From 852318449f2ab8e0bfaeca9904b90936d751d177 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Sat, 15 Jun 2024 20:17:43 +0200 Subject: [PATCH 001/661] Check perf tests --- src/Common/PODArray_fwd.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Common/PODArray_fwd.h b/src/Common/PODArray_fwd.h index d570a90e467..bd780eb51b0 100644 --- a/src/Common/PODArray_fwd.h +++ b/src/Common/PODArray_fwd.h @@ -17,12 +17,12 @@ constexpr size_t integerRoundUp(size_t value, size_t dividend) } template , size_t pad_right_ = 0, + typename TAllocator = Allocator, size_t pad_right_ = 0, size_t pad_left_ = 0> class PODArray; /** For columns. Padding is enough to read and write xmm-register at the address of the last element. */ -template > +template > using PaddedPODArray = PODArray; /** A helper for declaring PODArray that uses inline memory. @@ -32,6 +32,6 @@ using PaddedPODArray = PODArray using PODArrayWithStackMemory = PODArray, rounded_bytes, alignof(T)>>; + AllocatorWithStackMemory, rounded_bytes, alignof(T)>>; } From 7568de2202a7fa99539d8d46092315bf3c7fe5e6 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Sat, 15 Jun 2024 22:03:53 +0100 Subject: [PATCH 002/661] Revert "Check perf tests" This reverts commit 852318449f2ab8e0bfaeca9904b90936d751d177. --- src/Common/PODArray_fwd.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Common/PODArray_fwd.h b/src/Common/PODArray_fwd.h index bd780eb51b0..d570a90e467 100644 --- a/src/Common/PODArray_fwd.h +++ b/src/Common/PODArray_fwd.h @@ -17,12 +17,12 @@ constexpr size_t integerRoundUp(size_t value, size_t dividend) } template , size_t pad_right_ = 0, + typename TAllocator = Allocator, size_t pad_right_ = 0, size_t pad_left_ = 0> class PODArray; /** For columns. Padding is enough to read and write xmm-register at the address of the last element. */ -template > +template > using PaddedPODArray = PODArray; /** A helper for declaring PODArray that uses inline memory. @@ -32,6 +32,6 @@ using PaddedPODArray = PODArray using PODArrayWithStackMemory = PODArray, rounded_bytes, alignof(T)>>; + AllocatorWithStackMemory, rounded_bytes, alignof(T)>>; } From 57d036e5899b52fd4fdab9447630e01bf3d5382b Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Sat, 15 Jun 2024 22:04:24 +0100 Subject: [PATCH 003/661] impl --- tests/performance/scripts/perf.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/performance/scripts/perf.py b/tests/performance/scripts/perf.py index e98c158249a..94f145d82db 100755 --- a/tests/performance/scripts/perf.py +++ b/tests/performance/scripts/perf.py @@ -427,6 +427,8 @@ for query_index in queries_to_run: for conn_index, c in enumerate(this_query_connections): try: + c.execute("SYSTEM JEMALLOC PURGE") + res = c.execute( q, query_id=run_id, From 38537a00aa9e7185b69c066cd6809c54487ecf4e Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 18 Jun 2024 18:16:12 +0000 Subject: [PATCH 004/661] Make ActionsDAGPtr unique_ptr. --- src/Core/InterpolateDescription.cpp | 2 +- src/Core/InterpolateDescription.h | 2 +- src/Functions/indexHint.h | 4 +- src/Interpreters/ActionsDAG.cpp | 46 ++++++++------- src/Interpreters/ActionsDAG.h | 11 ++-- src/Interpreters/ActionsVisitor.cpp | 4 +- src/Interpreters/ActionsVisitor.h | 2 +- src/Interpreters/ExpressionActions.cpp | 20 ++++++- src/Interpreters/ExpressionActions.h | 6 +- src/Interpreters/ExpressionAnalyzer.cpp | 27 ++++----- src/Interpreters/ExpressionAnalyzer.h | 2 +- src/Interpreters/InterpreterInsertQuery.cpp | 4 +- src/Interpreters/InterpreterSelectQuery.cpp | 14 ++--- src/Interpreters/MutationsInterpreter.cpp | 2 +- src/Interpreters/TableJoin.cpp | 4 +- src/Interpreters/WindowDescription.h | 6 +- src/Interpreters/addMissingDefaults.cpp | 2 +- src/Interpreters/addMissingDefaults.h | 2 +- src/Interpreters/inplaceBlockConversions.cpp | 2 +- src/Interpreters/inplaceBlockConversions.h | 2 +- src/Planner/CollectTableExpressionData.cpp | 6 +- src/Planner/Planner.cpp | 49 ++++++++-------- src/Planner/PlannerActionsVisitor.cpp | 6 +- src/Planner/PlannerJoinTree.cpp | 58 ++++++++++--------- src/Planner/PlannerJoinTree.h | 2 +- src/Planner/PlannerJoins.cpp | 12 ++-- src/Planner/PlannerWindowFunctions.cpp | 13 ++++- src/Planner/PlannerWindowFunctions.h | 2 +- src/Planner/Utils.cpp | 2 +- src/Processors/QueryPlan/AggregatingStep.cpp | 4 +- src/Processors/QueryPlan/CubeStep.cpp | 4 +- src/Processors/QueryPlan/ExpressionStep.cpp | 12 ++-- src/Processors/QueryPlan/ExpressionStep.h | 2 +- src/Processors/QueryPlan/FilterStep.cpp | 11 ++-- src/Processors/QueryPlan/FilterStep.h | 2 +- .../Optimizations/distinctReadInOrder.cpp | 12 ++-- .../Optimizations/filterPushDown.cpp | 2 +- .../Optimizations/optimizePrewhere.cpp | 2 +- .../optimizePrimaryKeyCondition.cpp | 6 +- .../Optimizations/optimizeReadInOrder.cpp | 4 +- .../optimizeUseAggregateProjection.cpp | 4 +- .../optimizeUseNormalProjection.cpp | 2 +- .../Optimizations/projectionsCommon.cpp | 4 +- .../Optimizations/projectionsCommon.h | 2 +- .../Optimizations/removeRedundantDistinct.cpp | 23 ++++---- .../QueryPlan/ReadFromMergeTree.cpp | 38 ++++++------ src/Processors/QueryPlan/ReadFromMergeTree.h | 2 +- .../QueryPlan/ReadFromSystemNumbersStep.cpp | 2 +- .../QueryPlan/SourceStepWithFilter.cpp | 8 +-- .../QueryPlan/SourceStepWithFilter.h | 3 +- src/Processors/QueryPlan/TotalsHavingStep.cpp | 8 +-- src/Processors/QueryPlan/TotalsHavingStep.h | 2 +- src/Processors/QueryPlan/WindowStep.h | 2 +- src/Processors/SourceWithKeyCondition.h | 4 +- .../Transforms/FillingTransform.cpp | 2 +- src/Storages/Hive/StorageHive.cpp | 8 +-- src/Storages/KeyDescription.cpp | 2 +- src/Storages/MergeTree/KeyCondition.cpp | 4 +- src/Storages/MergeTree/KeyCondition.h | 2 +- src/Storages/MergeTree/MergeTreeData.cpp | 8 +-- src/Storages/MergeTree/MergeTreeData.h | 2 +- .../MergeTree/MergeTreeDataSelectExecutor.cpp | 8 +-- .../MergeTree/MergeTreeDataSelectExecutor.h | 4 +- .../MergeTree/MergeTreeIndexAnnoy.cpp | 2 +- src/Storages/MergeTree/MergeTreeIndexAnnoy.h | 2 +- .../MergeTree/MergeTreeIndexBloomFilter.cpp | 4 +- .../MergeTree/MergeTreeIndexBloomFilter.h | 4 +- .../MergeTreeIndexBloomFilterText.cpp | 4 +- .../MergeTree/MergeTreeIndexBloomFilterText.h | 4 +- .../MergeTree/MergeTreeIndexFullText.cpp | 4 +- .../MergeTree/MergeTreeIndexFullText.h | 4 +- .../MergeTree/MergeTreeIndexHypothesis.cpp | 2 +- .../MergeTree/MergeTreeIndexHypothesis.h | 2 +- .../MergeTree/MergeTreeIndexMinMax.cpp | 6 +- src/Storages/MergeTree/MergeTreeIndexMinMax.h | 4 +- src/Storages/MergeTree/MergeTreeIndexSet.cpp | 6 +- src/Storages/MergeTree/MergeTreeIndexSet.h | 4 +- .../MergeTree/MergeTreeIndexUSearch.cpp | 2 +- .../MergeTree/MergeTreeIndexUSearch.h | 2 +- src/Storages/MergeTree/MergeTreeIndices.h | 2 +- .../MergeTree/MergeTreeSelectProcessor.cpp | 4 +- .../MergeTree/MergeTreeSequentialSource.cpp | 4 +- .../MergeTreeSplitPrewhereIntoReadSteps.cpp | 32 +++++----- src/Storages/MergeTree/PartitionPruner.cpp | 2 +- src/Storages/MergeTree/PartitionPruner.h | 2 +- .../StorageObjectStorageSource.cpp | 2 +- src/Storages/SelectQueryInfo.h | 4 +- src/Storages/StorageBuffer.cpp | 4 +- src/Storages/StorageDistributed.cpp | 2 +- src/Storages/StorageFile.cpp | 2 +- src/Storages/StorageMerge.cpp | 10 ++-- src/Storages/StorageURL.h | 2 +- src/Storages/StorageValues.cpp | 4 +- src/Storages/StorageView.cpp | 2 +- src/Storages/TTLDescription.cpp | 2 +- src/Storages/VirtualColumnUtils.cpp | 4 +- src/Storages/VirtualColumnUtils.h | 2 +- src/Storages/WindowView/StorageWindowView.cpp | 12 ++-- 98 files changed, 355 insertions(+), 318 deletions(-) diff --git a/src/Core/InterpolateDescription.cpp b/src/Core/InterpolateDescription.cpp index d828c2e85e9..76bbefdcfd7 100644 --- a/src/Core/InterpolateDescription.cpp +++ b/src/Core/InterpolateDescription.cpp @@ -14,7 +14,7 @@ namespace DB { InterpolateDescription::InterpolateDescription(ActionsDAGPtr actions_, const Aliases & aliases) - : actions(actions_) + : actions(std::move(actions_)) { for (const auto & name_type : actions->getRequiredColumns()) { diff --git a/src/Core/InterpolateDescription.h b/src/Core/InterpolateDescription.h index 62d7120508b..73579aebee4 100644 --- a/src/Core/InterpolateDescription.h +++ b/src/Core/InterpolateDescription.h @@ -11,7 +11,7 @@ namespace DB { class ActionsDAG; -using ActionsDAGPtr = std::shared_ptr; +using ActionsDAGPtr = std::unique_ptr; using Aliases = std::unordered_map; /// Interpolate description diff --git a/src/Functions/indexHint.h b/src/Functions/indexHint.h index 3b71c7a5585..8fd7b751760 100644 --- a/src/Functions/indexHint.h +++ b/src/Functions/indexHint.h @@ -2,14 +2,12 @@ #include #include #include +#include namespace DB { -class ActionsDAG; -using ActionsDAGPtr = std::shared_ptr; - /** The `indexHint` function takes any number of any arguments and always returns one. * * This function has a special meaning (see ExpressionAnalyzer, KeyCondition) diff --git a/src/Interpreters/ActionsDAG.cpp b/src/Interpreters/ActionsDAG.cpp index 34f3e0a98bd..23e1e5ee152 100644 --- a/src/Interpreters/ActionsDAG.cpp +++ b/src/Interpreters/ActionsDAG.cpp @@ -626,7 +626,7 @@ void ActionsDAG::removeAliasesForFilter(const std::string & filter_name) ActionsDAGPtr ActionsDAG::cloneSubDAG(const NodeRawConstPtrs & outputs, bool remove_aliases) { - auto actions = std::make_shared(); + auto actions = std::make_unique(); std::unordered_map copy_map; struct Frame @@ -1248,25 +1248,29 @@ bool ActionsDAG::removeUnusedResult(const std::string & column_name) ActionsDAGPtr ActionsDAG::clone() const { - auto actions = std::make_shared(); + std::unordered_map old_to_new_nodes; + return clone(old_to_new_nodes); +} - std::unordered_map copy_map; +ActionsDAGPtr ActionsDAG::clone(std::unordered_map & old_to_new_nodes) const +{ + auto actions = std::make_unique(); for (const auto & node : nodes) { auto & copy_node = actions->nodes.emplace_back(node); - copy_map[&node] = ©_node; + old_to_new_nodes[&node] = ©_node; } for (auto & node : actions->nodes) for (auto & child : node.children) - child = copy_map[child]; + child = old_to_new_nodes[child]; for (const auto & output_node : outputs) - actions->outputs.push_back(copy_map[output_node]); + actions->outputs.push_back(old_to_new_nodes[output_node]); for (const auto & input_node : inputs) - actions->inputs.push_back(copy_map[input_node]); + actions->inputs.push_back(old_to_new_nodes[input_node]); return actions; } @@ -1421,7 +1425,7 @@ ActionsDAGPtr ActionsDAG::makeConvertingActions( if (add_casted_columns && mode != MatchColumnsMode::Name) throw Exception(ErrorCodes::LOGICAL_ERROR, "Converting with add_casted_columns supported only for MatchColumnsMode::Name"); - auto actions_dag = std::make_shared(source); + auto actions_dag = std::make_unique(source); NodeRawConstPtrs projection(num_result_columns); FunctionOverloadResolverPtr func_builder_materialize = std::make_unique(std::make_shared()); @@ -1549,7 +1553,7 @@ ActionsDAGPtr ActionsDAG::makeConvertingActions( ActionsDAGPtr ActionsDAG::makeAddingColumnActions(ColumnWithTypeAndName column) { - auto adding_column_action = std::make_shared(); + auto adding_column_action = std::make_unique(); FunctionOverloadResolverPtr func_builder_materialize = std::make_unique(std::make_shared()); @@ -1570,7 +1574,7 @@ ActionsDAGPtr ActionsDAG::merge(ActionsDAG && first, ActionsDAG && second) /// Some actions could become unused. Do not drop inputs to preserve the header. first.removeUnusedActions(false); - return std::make_shared(std::move(first)); + return std::make_unique(std::move(first)); } void ActionsDAG::mergeInplace(ActionsDAG && second) @@ -1963,12 +1967,12 @@ ActionsDAG::SplitResult ActionsDAG::split(std::unordered_set split second_inputs.push_back(cur.to_second); } - auto first_actions = std::make_shared(); + auto first_actions = std::make_unique(); first_actions->nodes.swap(first_nodes); first_actions->outputs.swap(first_outputs); first_actions->inputs.swap(first_inputs); - auto second_actions = std::make_shared(); + auto second_actions = std::make_unique(); second_actions->nodes.swap(second_nodes); second_actions->outputs.swap(second_outputs); second_actions->inputs.swap(second_inputs); @@ -2302,7 +2306,7 @@ ActionsDAGPtr ActionsDAG::createActionsForConjunction(NodeRawConstPtrs conjuncti if (conjunction.empty()) return nullptr; - auto actions = std::make_shared(); + auto actions = std::make_unique(); FunctionOverloadResolverPtr func_builder_and = std::make_unique(std::make_shared()); @@ -2866,7 +2870,7 @@ ActionsDAGPtr ActionsDAG::buildFilterActionsDAG( bool visited_children = false; }; - auto result_dag = std::make_shared(); + auto result_dag = std::make_unique(); std::unordered_map result_inputs; std::unordered_map node_to_result_node; @@ -2964,7 +2968,7 @@ ActionsDAGPtr ActionsDAG::buildFilterActionsDAG( const auto & index_hint_args = index_hint->getActions()->getOutputs(); if (index_hint_args.empty()) - index_hint_filter_dag = std::make_shared(); + index_hint_filter_dag = std::make_unique(); else index_hint_filter_dag = buildFilterActionsDAG(index_hint_args, node_name_to_input_node_column, @@ -3108,10 +3112,10 @@ ActionsDAG::NodeRawConstPtrs ActionsDAG::filterNodesByAllowedInputs( return nodes; } -FindOriginalNodeForOutputName::FindOriginalNodeForOutputName(const ActionsDAGPtr & actions_) - :actions(actions_) +FindOriginalNodeForOutputName::FindOriginalNodeForOutputName(const ActionsDAG & actions_) + //: actions(actions_) { - const auto & actions_outputs = actions->getOutputs(); + const auto & actions_outputs = actions_.getOutputs(); for (const auto * output_node : actions_outputs) { /// find input node which refers to the output node @@ -3147,10 +3151,10 @@ const ActionsDAG::Node * FindOriginalNodeForOutputName::find(const String & outp return it->second; } -FindAliasForInputName::FindAliasForInputName(const ActionsDAGPtr & actions_) - :actions(actions_) +FindAliasForInputName::FindAliasForInputName(const ActionsDAG & actions_) + //: actions(actions_) { - const auto & actions_outputs = actions->getOutputs(); + const auto & actions_outputs = actions_.getOutputs(); for (const auto * output_node : actions_outputs) { /// find input node which corresponds to alias diff --git a/src/Interpreters/ActionsDAG.h b/src/Interpreters/ActionsDAG.h index c9974fd849c..4a840885b6a 100644 --- a/src/Interpreters/ActionsDAG.h +++ b/src/Interpreters/ActionsDAG.h @@ -12,7 +12,7 @@ namespace DB { class ActionsDAG; -using ActionsDAGPtr = std::shared_ptr; +using ActionsDAGPtr = std::unique_ptr; class IExecutableFunction; using ExecutableFunctionPtr = std::shared_ptr; @@ -262,6 +262,7 @@ public: #endif ActionsDAGPtr clone() const; + ActionsDAGPtr clone(std::unordered_map & old_to_new_nodes) const; static ActionsDAGPtr cloneSubDAG(const NodeRawConstPtrs & outputs, bool remove_aliases); @@ -480,11 +481,11 @@ class FindOriginalNodeForOutputName using NameToNodeIndex = std::unordered_map; public: - explicit FindOriginalNodeForOutputName(const ActionsDAGPtr & actions); + explicit FindOriginalNodeForOutputName(const ActionsDAG & actions); const ActionsDAG::Node * find(const String & output_name); private: - ActionsDAGPtr actions; + //const ActionsDAG & actions; NameToNodeIndex index; }; @@ -493,11 +494,11 @@ class FindAliasForInputName using NameToNodeIndex = std::unordered_map; public: - explicit FindAliasForInputName(const ActionsDAGPtr & actions); + explicit FindAliasForInputName(const ActionsDAG & actions); const ActionsDAG::Node * find(const String & name); private: - ActionsDAGPtr actions; + //const ActionsDAG & actions; NameToNodeIndex index; }; diff --git a/src/Interpreters/ActionsVisitor.cpp b/src/Interpreters/ActionsVisitor.cpp index 9e56d740e5e..1838a7b04b9 100644 --- a/src/Interpreters/ActionsVisitor.cpp +++ b/src/Interpreters/ActionsVisitor.cpp @@ -1009,7 +1009,7 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data & dag.project(args); auto index_hint = std::make_shared(); - index_hint->setActions(std::make_shared(std::move(dag))); + index_hint->setActions(std::make_unique(std::move(dag))); // Arguments are removed. We add function instead of constant column to avoid constant folding. data.addFunction(std::make_unique(index_hint), {}, column_name); @@ -1272,7 +1272,7 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data & lambda_dag.removeUnusedActions(Names(1, result_name)); auto lambda_actions = std::make_shared( - std::make_shared(std::move(lambda_dag)), + std::make_unique(std::move(lambda_dag)), ExpressionActionsSettings::fromContext(data.getContext(), CompileExpressions::yes)); DataTypePtr result_type = lambda_actions->getSampleBlock().getByName(result_name).type; diff --git a/src/Interpreters/ActionsVisitor.h b/src/Interpreters/ActionsVisitor.h index 46d2d60e461..496d9b9b587 100644 --- a/src/Interpreters/ActionsVisitor.h +++ b/src/Interpreters/ActionsVisitor.h @@ -22,7 +22,7 @@ class ExpressionActions; using ExpressionActionsPtr = std::shared_ptr; class ActionsDAG; -using ActionsDAGPtr = std::shared_ptr; +using ActionsDAGPtr = std::unique_ptr; class IFunctionOverloadResolver; using FunctionOverloadResolverPtr = std::shared_ptr; diff --git a/src/Interpreters/ExpressionActions.cpp b/src/Interpreters/ExpressionActions.cpp index 7f96c927d82..7cbf5afd763 100644 --- a/src/Interpreters/ExpressionActions.cpp +++ b/src/Interpreters/ExpressionActions.cpp @@ -73,7 +73,25 @@ ExpressionActions::ExpressionActions(ActionsDAGPtr actions_dag_, const Expressio ExpressionActionsPtr ExpressionActions::clone() const { - return std::make_shared(*this); + auto copy = std::make_shared(ExpressionActions()); + + std::unordered_map copy_map; + copy->actions_dag = actions_dag->clone(copy_map); + copy->actions = actions; + for (auto & action : copy->actions) + action.node = copy_map[action.node]; + + copy->num_columns = num_columns; + + copy->required_columns = required_columns; + copy->input_positions = input_positions; + copy->result_positions = result_positions; + copy->sample_block = sample_block; + + copy->project_inputs = project_inputs; + copy->settings = settings; + + return copy; } namespace diff --git a/src/Interpreters/ExpressionActions.h b/src/Interpreters/ExpressionActions.h index ddffe022215..63ea989bd5e 100644 --- a/src/Interpreters/ExpressionActions.h +++ b/src/Interpreters/ExpressionActions.h @@ -84,10 +84,9 @@ private: ExpressionActionsSettings settings; public: - ExpressionActions() = delete; explicit ExpressionActions(ActionsDAGPtr actions_dag_, const ExpressionActionsSettings & settings_ = {}, bool project_inputs_ = false); - ExpressionActions(const ExpressionActions &) = default; - ExpressionActions & operator=(const ExpressionActions &) = default; + ExpressionActions(ExpressionActions &&) = default; + ExpressionActions & operator=(ExpressionActions &&) = default; const Actions & getActions() const { return actions; } const std::list & getNodes() const { return actions_dag->getNodes(); } @@ -131,6 +130,7 @@ public: ExpressionActionsPtr clone() const; private: + ExpressionActions() = default; void checkLimits(const ColumnsWithTypeAndName & columns) const; void linearizeActions(const std::unordered_set & lazy_executed_nodes); diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index 62cddd9caf7..be00e37c751 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -658,7 +658,7 @@ void ExpressionAnalyzer::makeWindowDescriptionFromAST(const Context & context_, with_alias->getColumnName(), 1 /* direction */, 1 /* nulls_direction */)); - auto actions_dag = std::make_shared(aggregated_columns); + auto actions_dag = std::make_unique(aggregated_columns); getRootActions(column_ast, false, *actions_dag); desc.partition_by_actions.push_back(std::move(actions_dag)); } @@ -679,7 +679,7 @@ void ExpressionAnalyzer::makeWindowDescriptionFromAST(const Context & context_, order_by_element.direction, order_by_element.nulls_direction)); - auto actions_dag = std::make_shared(aggregated_columns); + auto actions_dag = std::make_unique(aggregated_columns); getRootActions(column_ast, false, *actions_dag); desc.order_by_actions.push_back(std::move(actions_dag)); } @@ -823,13 +823,14 @@ void ExpressionAnalyzer::makeWindowDescriptions(ActionsDAG & actions) makeWindowDescriptionFromAST(*current_context, window_descriptions, desc, &definition); + auto full_sort_description = desc.full_sort_description; + auto [it, inserted] = window_descriptions.insert( - {default_window_name, desc}); + {default_window_name, std::move(desc)}); if (!inserted) { - assert(it->second.full_sort_description - == desc.full_sort_description); + assert(it->second.full_sort_description == full_sort_description); } it->second.window_functions.push_back(window_function); @@ -1353,10 +1354,10 @@ bool SelectQueryExpressionAnalyzer::appendGroupBy(ExpressionActionsChain & chain { for (auto & child : asts) { - auto actions_dag = std::make_shared(columns_after_join); + auto actions_dag = std::make_unique(columns_after_join); getRootActions(child, only_types, *actions_dag); group_by_elements_actions.emplace_back( - std::make_shared(actions_dag, ExpressionActionsSettings::fromContext(getContext(), CompileExpressions::yes))); + std::make_shared(std::move(actions_dag), ExpressionActionsSettings::fromContext(getContext(), CompileExpressions::yes))); } } @@ -1606,10 +1607,10 @@ ActionsAndProjectInputsFlagPtr SelectQueryExpressionAnalyzer::appendOrderBy(Expr { for (const auto & child : select_query->orderBy()->children) { - auto actions_dag = std::make_shared(columns_after_join); + auto actions_dag = std::make_unique(columns_after_join); getRootActions(child, only_types, *actions_dag); order_by_elements_actions.emplace_back( - std::make_shared(actions_dag, ExpressionActionsSettings::fromContext(getContext(), CompileExpressions::yes))); + std::make_shared(std::move(actions_dag), ExpressionActionsSettings::fromContext(getContext(), CompileExpressions::yes))); } } @@ -1799,7 +1800,7 @@ ExpressionActionsPtr ExpressionAnalyzer::getActions(bool add_aliases, bool remov ActionsDAGPtr ExpressionAnalyzer::getConstActionsDAG(const ColumnsWithTypeAndName & constant_inputs) { - auto actions = std::make_shared(constant_inputs); + auto actions = std::make_unique(constant_inputs); getRootActions(query, true /* no_makeset_for_subqueries */, *actions, true /* only_consts */); return actions; } @@ -1807,7 +1808,7 @@ ActionsDAGPtr ExpressionAnalyzer::getConstActionsDAG(const ColumnsWithTypeAndNam ExpressionActionsPtr ExpressionAnalyzer::getConstActions(const ColumnsWithTypeAndName & constant_inputs) { auto actions = getConstActionsDAG(constant_inputs); - return std::make_shared(actions, ExpressionActionsSettings::fromContext(getContext())); + return std::make_shared(std::move(actions), ExpressionActionsSettings::fromContext(getContext())); } std::unique_ptr SelectQueryExpressionAnalyzer::getJoinedPlan() @@ -1878,7 +1879,7 @@ ExpressionAnalysisResult::ExpressionAnalysisResult( if (prewhere_dag_and_flags) { - auto dag = std::make_shared(std::move(prewhere_dag_and_flags->dag)); + auto dag = std::make_unique(std::move(prewhere_dag_and_flags->dag)); prewhere_info = std::make_shared(std::move(dag), query.prewhere()->getColumnName()); prewhere_dag_and_flags.reset(); } @@ -1945,7 +1946,7 @@ ExpressionAnalysisResult::ExpressionAnalysisResult( { auto dag = prewhere_dag_and_flags->dag.clone(); ExpressionActions( - dag, + std::move(dag), ExpressionActionsSettings::fromSettings(context->getSettingsRef())).execute(before_prewhere_sample); auto & column_elem = before_prewhere_sample.getByName(query.prewhere()->getColumnName()); /// If the filter column is a constant, record it. diff --git a/src/Interpreters/ExpressionAnalyzer.h b/src/Interpreters/ExpressionAnalyzer.h index 12d6dce8f72..e44a5891e77 100644 --- a/src/Interpreters/ExpressionAnalyzer.h +++ b/src/Interpreters/ExpressionAnalyzer.h @@ -39,7 +39,7 @@ class ArrayJoinAction; using ArrayJoinActionPtr = std::shared_ptr; class ActionsDAG; -using ActionsDAGPtr = std::shared_ptr; +using ActionsDAGPtr = std::unique_ptr; /// Create columns in block or return false if not possible bool sanitizeBlock(Block & block, bool throw_if_cannot_create_column = false); diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index 927bafe4bfb..4dbdebd0d06 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -384,7 +384,7 @@ Chain InterpreterInsertQuery::buildPreSinkChain( context_ptr, null_as_default); - auto adding_missing_defaults_actions = std::make_shared(adding_missing_defaults_dag); + auto adding_missing_defaults_actions = std::make_shared(std::move(adding_missing_defaults_dag)); /// Actually we don't know structure of input blocks from query/table, /// because some clients break insertion protocol (columns != header) @@ -597,7 +597,7 @@ BlockIO InterpreterInsertQuery::execute() pipeline.getHeader().getColumnsWithTypeAndName(), header.getColumnsWithTypeAndName(), ActionsDAG::MatchColumnsMode::Position); - auto actions = std::make_shared(actions_dag, ExpressionActionsSettings::fromContext(getContext(), CompileExpressions::yes)); + auto actions = std::make_shared(std::move(actions_dag), ExpressionActionsSettings::fromContext(getContext(), CompileExpressions::yes)); pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr { diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index f5b54ec64cb..64a17a7ba87 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -1302,7 +1302,7 @@ static InterpolateDescriptionPtr getInterpolateDescription( result_columns, ActionsDAG::MatchColumnsMode::Position, true); ActionsDAGPtr merge_dag = ActionsDAG::merge(std::move(*actions->clone()), std::move(*conv_dag)); - interpolate_descr = std::make_shared(merge_dag, aliases); + interpolate_descr = std::make_shared(std::move(merge_dag), aliases); } return interpolate_descr; @@ -2042,7 +2042,7 @@ void InterpreterSelectQuery::addEmptySourceToQueryPlan(QueryPlan & query_plan, c pipe.addSimpleTransform([&](const Block & header) { return std::make_shared(header, - std::make_shared(prewhere_info.row_level_filter), + std::make_shared(prewhere_info.row_level_filter->clone()), prewhere_info.row_level_column_name, true); }); } @@ -2050,7 +2050,7 @@ void InterpreterSelectQuery::addEmptySourceToQueryPlan(QueryPlan & query_plan, c pipe.addSimpleTransform([&](const Block & header) { return std::make_shared( - header, std::make_shared(prewhere_info.prewhere_actions), + header, std::make_shared(prewhere_info.prewhere_actions->clone()), prewhere_info.prewhere_column_name, prewhere_info.remove_prewhere_column); }); } @@ -2094,8 +2094,8 @@ void InterpreterSelectQuery::applyFiltersToPrewhereInAnalysis(ExpressionAnalysis if (does_storage_support_prewhere && shouldMoveToPrewhere()) { /// Execute row level filter in prewhere as a part of "move to prewhere" optimization. - analysis.prewhere_info = std::make_shared(analysis.filter_info->actions, analysis.filter_info->column_name); - analysis.prewhere_info->remove_prewhere_column = analysis.filter_info->do_remove_column; + analysis.prewhere_info = std::make_shared(std::move(analysis.filter_info->actions), analysis.filter_info->column_name); + analysis.prewhere_info->remove_prewhere_column = std::move(analysis.filter_info->do_remove_column); analysis.prewhere_info->need_filter = true; analysis.filter_info = nullptr; } @@ -2103,8 +2103,8 @@ void InterpreterSelectQuery::applyFiltersToPrewhereInAnalysis(ExpressionAnalysis else { /// Add row level security actions to prewhere. - analysis.prewhere_info->row_level_filter = analysis.filter_info->actions; - analysis.prewhere_info->row_level_column_name = analysis.filter_info->column_name; + analysis.prewhere_info->row_level_filter = std::move(analysis.filter_info->actions); + analysis.prewhere_info->row_level_column_name = std::move(analysis.filter_info->column_name); analysis.filter_info = nullptr; } } diff --git a/src/Interpreters/MutationsInterpreter.cpp b/src/Interpreters/MutationsInterpreter.cpp index 6d3a4f30b34..1bb770bf561 100644 --- a/src/Interpreters/MutationsInterpreter.cpp +++ b/src/Interpreters/MutationsInterpreter.cpp @@ -1211,7 +1211,7 @@ void MutationsInterpreter::Source::read( MergeTreeSequentialSourceType::Mutation, plan, *data, storage_snapshot, part, required_columns, - apply_deleted_mask_, filter, context_, + apply_deleted_mask_, std::move(filter), context_, getLogger("MutationsInterpreter")); } else diff --git a/src/Interpreters/TableJoin.cpp b/src/Interpreters/TableJoin.cpp index 6191eb73fd4..baf3a743f40 100644 --- a/src/Interpreters/TableJoin.cpp +++ b/src/Interpreters/TableJoin.cpp @@ -470,7 +470,7 @@ static ActionsDAGPtr createWrapWithTupleActions( if (column_names_to_wrap.empty()) return nullptr; - auto actions_dag = std::make_shared(source_columns); + auto actions_dag = std::make_unique(source_columns); FunctionOverloadResolverPtr func_builder = std::make_unique(std::make_shared()); @@ -616,7 +616,7 @@ TableJoin::createConvertingActions( mergeDags(right_dag, std::move(new_right_dag)); } - return {left_dag, right_dag}; + return {std::move(left_dag), std::move(right_dag)}; } template diff --git a/src/Interpreters/WindowDescription.h b/src/Interpreters/WindowDescription.h index c26e4517c9a..17bfe619c30 100644 --- a/src/Interpreters/WindowDescription.h +++ b/src/Interpreters/WindowDescription.h @@ -14,7 +14,7 @@ namespace DB class ASTFunction; class ActionsDAG; -using ActionsDAGPtr = std::shared_ptr; +using ActionsDAGPtr = std::unique_ptr; struct WindowFunctionDescription { @@ -93,8 +93,8 @@ struct WindowDescription // then by ORDER BY. This field holds this combined sort order. SortDescription full_sort_description; - std::vector partition_by_actions; - std::vector order_by_actions; + std::vector> partition_by_actions; + std::vector> order_by_actions; WindowFrame frame; diff --git a/src/Interpreters/addMissingDefaults.cpp b/src/Interpreters/addMissingDefaults.cpp index fbf17d7efb7..929999c8c37 100644 --- a/src/Interpreters/addMissingDefaults.cpp +++ b/src/Interpreters/addMissingDefaults.cpp @@ -21,7 +21,7 @@ ActionsDAGPtr addMissingDefaults( ContextPtr context, bool null_as_default) { - auto actions = std::make_shared(header.getColumnsWithTypeAndName()); + auto actions = std::make_unique(header.getColumnsWithTypeAndName()); auto & index = actions->getOutputs(); /// For missing columns of nested structure, you need to create not a column of empty arrays, but a column of arrays of correct lengths. diff --git a/src/Interpreters/addMissingDefaults.h b/src/Interpreters/addMissingDefaults.h index 0a3d4de478c..94afd806dfd 100644 --- a/src/Interpreters/addMissingDefaults.h +++ b/src/Interpreters/addMissingDefaults.h @@ -15,7 +15,7 @@ class NamesAndTypesList; class ColumnsDescription; class ActionsDAG; -using ActionsDAGPtr = std::shared_ptr; +using ActionsDAGPtr = std::unique_ptr; /** Adds three types of columns into block * 1. Columns, that are missed inside request, but present in table without defaults (missed columns) diff --git a/src/Interpreters/inplaceBlockConversions.cpp b/src/Interpreters/inplaceBlockConversions.cpp index 239cce5b427..b000264ae33 100644 --- a/src/Interpreters/inplaceBlockConversions.cpp +++ b/src/Interpreters/inplaceBlockConversions.cpp @@ -163,7 +163,7 @@ ActionsDAGPtr createExpressions( auto syntax_result = TreeRewriter(context).analyze(expr_list, header.getNamesAndTypesList()); auto expression_analyzer = ExpressionAnalyzer{expr_list, syntax_result, context}; - auto dag = std::make_shared(header.getNamesAndTypesList()); + auto dag = std::make_unique(header.getNamesAndTypesList()); auto actions = expression_analyzer.getActionsDAG(true, !save_unneeded_columns); dag = ActionsDAG::merge(std::move(*dag), std::move(*actions)); diff --git a/src/Interpreters/inplaceBlockConversions.h b/src/Interpreters/inplaceBlockConversions.h index bea44bf6db9..ffc77561e79 100644 --- a/src/Interpreters/inplaceBlockConversions.h +++ b/src/Interpreters/inplaceBlockConversions.h @@ -24,7 +24,7 @@ struct StorageInMemoryMetadata; using StorageMetadataPtr = std::shared_ptr; class ActionsDAG; -using ActionsDAGPtr = std::shared_ptr; +using ActionsDAGPtr = std::unique_ptr; /// Create actions which adds missing defaults to block according to required_columns using columns description /// or substitute NULL into DEFAULT value in case of INSERT SELECT query (null_as_default) if according setting is 1. diff --git a/src/Planner/CollectTableExpressionData.cpp b/src/Planner/CollectTableExpressionData.cpp index d5e39a9f123..162d3fe8d11 100644 --- a/src/Planner/CollectTableExpressionData.cpp +++ b/src/Planner/CollectTableExpressionData.cpp @@ -88,7 +88,7 @@ public: auto column_identifier = planner_context->getGlobalPlannerContext()->createColumnIdentifier(node); - ActionsDAGPtr alias_column_actions_dag = std::make_shared(); + ActionsDAGPtr alias_column_actions_dag = std::make_unique(); PlannerActionsVisitor actions_visitor(planner_context, false); auto outputs = actions_visitor.visit(*alias_column_actions_dag, column_node->getExpression()); if (outputs.size() != 1) @@ -97,7 +97,7 @@ public: const auto & column_name = column_node->getColumnName(); const auto & alias_node = alias_column_actions_dag->addAlias(*outputs[0], column_name); alias_column_actions_dag->addOrReplaceInOutputs(alias_node); - table_expression_data.addAliasColumn(column_node->getColumn(), column_identifier, alias_column_actions_dag, select_added_columns); + table_expression_data.addAliasColumn(column_node->getColumn(), column_identifier, std::move(alias_column_actions_dag), select_added_columns); } return; @@ -335,7 +335,7 @@ void collectTableExpressionData(QueryTreeNodePtr & query_node, PlannerContextPtr collect_source_columns_visitor.setKeepAliasColumns(false); collect_source_columns_visitor.visit(query_node_typed.getPrewhere()); - auto prewhere_actions_dag = std::make_shared(); + auto prewhere_actions_dag = std::make_unique(); QueryTreeNodePtr query_tree_node = query_node_typed.getPrewhere(); diff --git a/src/Planner/Planner.cpp b/src/Planner/Planner.cpp index 2d42ed73223..681ae7e6ac4 100644 --- a/src/Planner/Planner.cpp +++ b/src/Planner/Planner.cpp @@ -213,7 +213,7 @@ FiltersForTableExpressionMap collectFiltersForAnalysis(const QueryTreeNodePtr & if (!read_from_dummy) continue; - auto filter_actions = read_from_dummy->getFilterActionsDAG(); + auto filter_actions = read_from_dummy->detachFilterActionsDAG(); const auto & table_node = dummy_storage_to_table.at(&read_from_dummy->getStorage()); res[table_node] = FiltersForTableExpression{std::move(filter_actions), read_from_dummy->getPrewhereInfo()}; } @@ -331,13 +331,13 @@ public: void addExpressionStep(QueryPlan & query_plan, const ActionsAndProjectInputsFlagPtr & expression_actions, const std::string & step_description, - std::vector & result_actions_to_execute) + std::vector & result_actions_to_execute) { auto actions = expression_actions->dag.clone(); if (expression_actions->project_input) actions->appendInputsForUnusedColumns(query_plan.getCurrentDataStream().header); - result_actions_to_execute.push_back(actions); + result_actions_to_execute.push_back(actions.get()); auto expression_step = std::make_unique(query_plan.getCurrentDataStream(), actions); expression_step->setStepDescription(step_description); query_plan.addStep(std::move(expression_step)); @@ -346,13 +346,13 @@ void addExpressionStep(QueryPlan & query_plan, void addFilterStep(QueryPlan & query_plan, const FilterAnalysisResult & filter_analysis_result, const std::string & step_description, - std::vector & result_actions_to_execute) + std::vector & result_actions_to_execute) { auto actions = filter_analysis_result.filter_actions->dag.clone(); if (filter_analysis_result.filter_actions->project_input) actions->appendInputsForUnusedColumns(query_plan.getCurrentDataStream().header); - result_actions_to_execute.push_back(actions); + result_actions_to_execute.push_back(actions.get()); auto where_step = std::make_unique(query_plan.getCurrentDataStream(), actions, filter_analysis_result.filter_column_name, @@ -544,7 +544,7 @@ void addTotalsHavingStep(QueryPlan & query_plan, const QueryAnalysisResult & query_analysis_result, const PlannerContextPtr & planner_context, const QueryNode & query_node, - std::vector & result_actions_to_execute) + std::vector & result_actions_to_execute) { const auto & query_context = planner_context->getQueryContext(); const auto & settings = query_context->getSettingsRef(); @@ -560,7 +560,7 @@ void addTotalsHavingStep(QueryPlan & query_plan, if (having_analysis_result.filter_actions->project_input) actions->appendInputsForUnusedColumns(query_plan.getCurrentDataStream().header); - result_actions_to_execute.push_back(actions); + result_actions_to_execute.push_back(actions.get()); } auto totals_having_step = std::make_unique( @@ -714,7 +714,7 @@ void addWithFillStepIfNeeded(QueryPlan & query_plan, if (query_node.hasInterpolate()) { - auto interpolate_actions_dag = std::make_shared(); + auto interpolate_actions_dag = std::make_unique(); auto query_plan_columns = query_plan.getCurrentDataStream().header.getColumnsWithTypeAndName(); for (auto & query_plan_column : query_plan_columns) { @@ -885,7 +885,7 @@ void addPreliminarySortOrDistinctOrLimitStepsIfNeeded(QueryPlan & query_plan, const PlannerContextPtr & planner_context, const PlannerQueryProcessingInfo & query_processing_info, const QueryTreeNodePtr & query_tree, - std::vector & result_actions_to_execute) + std::vector & result_actions_to_execute) { const auto & query_node = query_tree->as(); @@ -932,14 +932,14 @@ void addWindowSteps(QueryPlan & query_plan, const auto & query_context = planner_context->getQueryContext(); const auto & settings = query_context->getSettingsRef(); - auto window_descriptions = window_analysis_result.window_descriptions; - sortWindowDescriptions(window_descriptions); + const auto & window_descriptions = window_analysis_result.window_descriptions; + auto perm = sortWindowDescriptions(window_descriptions); size_t window_descriptions_size = window_descriptions.size(); for (size_t i = 0; i < window_descriptions_size; ++i) { - const auto & window_description = window_descriptions[i]; + const auto & window_description = window_descriptions[perm[i]]; /** We don't need to sort again if the input from previous window already * has suitable sorting. Also don't create sort steps when there are no @@ -952,8 +952,9 @@ void addWindowSteps(QueryPlan & query_plan, bool need_sort = !window_description.full_sort_description.empty(); if (need_sort && i != 0) { - need_sort = !sortDescriptionIsPrefix(window_description.full_sort_description, window_descriptions[i - 1].full_sort_description) - || (settings.max_threads != 1 && window_description.partition_by.size() != window_descriptions[i - 1].partition_by.size()); + auto prev = perm[i - 1]; + need_sort = !sortDescriptionIsPrefix(window_description.full_sort_description, window_descriptions[prev].full_sort_description) + || (settings.max_threads != 1 && window_description.partition_by.size() != window_descriptions[prev].partition_by.size()); } if (need_sort) { @@ -1054,9 +1055,9 @@ void addOffsetStep(QueryPlan & query_plan, const QueryAnalysisResult & query_ana } } -void collectSetsFromActionsDAG(const ActionsDAGPtr & dag, std::unordered_set & useful_sets) +void collectSetsFromActionsDAG(const ActionsDAG & dag, std::unordered_set & useful_sets) { - for (const auto & node : dag->getNodes()) + for (const auto & node : dag.getNodes()) { if (node.column) { @@ -1075,7 +1076,7 @@ void collectSetsFromActionsDAG(const ActionsDAGPtr & dag, std::unordered_set(adaptor->getFunction().get())) { - collectSetsFromActionsDAG(index_hint->getActions(), useful_sets); + collectSetsFromActionsDAG(*index_hint->getActions(), useful_sets); } } } @@ -1086,13 +1087,13 @@ void addBuildSubqueriesForSetsStepIfNeeded( QueryPlan & query_plan, const SelectQueryOptions & select_query_options, const PlannerContextPtr & planner_context, - const std::vector & result_actions_to_execute) + const std::vector & result_actions_to_execute) { auto subqueries = planner_context->getPreparedSets().getSubqueries(); std::unordered_set useful_sets; - for (const auto & actions_to_execute : result_actions_to_execute) - collectSetsFromActionsDAG(actions_to_execute, useful_sets); + for (const auto * actions_to_execute : result_actions_to_execute) + collectSetsFromActionsDAG(*actions_to_execute, useful_sets); auto predicate = [&useful_sets](const auto & set) { return !useful_sets.contains(set.get()); }; auto it = std::remove_if(subqueries.begin(), subqueries.end(), std::move(predicate)); @@ -1448,7 +1449,7 @@ void Planner::buildPlanForQueryNode() if (it != table_filters.end()) { const auto & filters = it->second; - table_expression_data.setFilterActions(filters.filter_actions); + table_expression_data.setFilterActions(filters.filter_actions->clone()); table_expression_data.setPrewhereInfo(filters.prewhere_info); } } @@ -1539,15 +1540,15 @@ void Planner::buildPlanForQueryNode() planner_context, query_processing_info); - std::vector result_actions_to_execute = std::move(join_tree_query_plan.actions_dags); + std::vector result_actions_to_execute = std::move(join_tree_query_plan.actions_dags); for (auto & [_, table_expression_data] : planner_context->getTableExpressionNodeToData()) { if (table_expression_data.getPrewhereFilterActions()) - result_actions_to_execute.push_back(table_expression_data.getPrewhereFilterActions()); + result_actions_to_execute.push_back(table_expression_data.getPrewhereFilterActions().get()); if (table_expression_data.getRowLevelFilterActions()) - result_actions_to_execute.push_back(table_expression_data.getRowLevelFilterActions()); + result_actions_to_execute.push_back(table_expression_data.getRowLevelFilterActions().get()); } if (query_processing_info.isIntermediateStage()) diff --git a/src/Planner/PlannerActionsVisitor.cpp b/src/Planner/PlannerActionsVisitor.cpp index 7a12d5d690d..59ec7778e21 100644 --- a/src/Planner/PlannerActionsVisitor.cpp +++ b/src/Planner/PlannerActionsVisitor.cpp @@ -757,7 +757,7 @@ PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::vi lambda_arguments_names_and_types.emplace_back(lambda_argument_name, std::move(lambda_argument_type)); } - auto lambda_actions_dag = std::make_shared(); + auto lambda_actions_dag = std::make_unique(); actions_stack.emplace_back(*lambda_actions_dag, node); auto [lambda_expression_node_name, levels] = visitImpl(lambda_node.getExpression()); @@ -765,7 +765,7 @@ PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::vi lambda_actions_dag->removeUnusedActions(Names(1, lambda_expression_node_name)); auto expression_actions_settings = ExpressionActionsSettings::fromContext(planner_context->getQueryContext(), CompileExpressions::yes); - auto lambda_actions = std::make_shared(lambda_actions_dag, expression_actions_settings); + auto lambda_actions = std::make_shared(std::move(lambda_actions_dag), expression_actions_settings); Names captured_column_names; ActionsDAG::NodeRawConstPtrs lambda_children; @@ -879,7 +879,7 @@ PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::vi const auto & function_node = node->as(); auto function_node_name = action_node_name_helper.calculateActionNodeName(node); - auto index_hint_actions_dag = std::make_shared(); + auto index_hint_actions_dag = std::make_unique(); auto & index_hint_actions_dag_outputs = index_hint_actions_dag->getOutputs(); std::unordered_set index_hint_actions_dag_output_node_names; PlannerActionsVisitor actions_visitor(planner_context); diff --git a/src/Planner/PlannerJoinTree.cpp b/src/Planner/PlannerJoinTree.cpp index 43b223172e6..918cfad703e 100644 --- a/src/Planner/PlannerJoinTree.cpp +++ b/src/Planner/PlannerJoinTree.cpp @@ -591,7 +591,7 @@ UInt64 mainQueryNodeBlockSizeByLimit(const SelectQueryInfo & select_query_info) std::unique_ptr createComputeAliasColumnsStep( const std::unordered_map & alias_column_expressions, const DataStream & current_data_stream) { - ActionsDAGPtr merged_alias_columns_actions_dag = std::make_shared(current_data_stream.header.getColumnsWithTypeAndName()); + ActionsDAGPtr merged_alias_columns_actions_dag = std::make_unique(current_data_stream.header.getColumnsWithTypeAndName()); ActionsDAG::NodeRawConstPtrs action_dag_outputs = merged_alias_columns_actions_dag->getInputs(); for (const auto & [column_name, alias_column_actions_dag] : alias_column_expressions) @@ -646,7 +646,7 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres auto table_expression_query_info = select_query_info; table_expression_query_info.table_expression = table_expression; - table_expression_query_info.filter_actions_dag = table_expression_data.getFilterActions(); + table_expression_query_info.filter_actions_dag = table_expression_data.getFilterActions()->clone(); table_expression_query_info.analyzer_can_use_parallel_replicas_on_follower = table_node == planner_context->getGlobalPlannerContext()->parallel_replicas_table; size_t max_streams = settings.max_threads; @@ -776,7 +776,7 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres if (prewhere_actions) { prewhere_info = std::make_shared(); - prewhere_info->prewhere_actions = prewhere_actions; + prewhere_info->prewhere_actions = prewhere_actions->clone(); prewhere_info->prewhere_column_name = prewhere_actions->getOutputs().at(0)->result_name; prewhere_info->remove_prewhere_column = true; prewhere_info->need_filter = true; @@ -787,7 +787,7 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres const auto & columns_names = table_expression_data.getColumnNames(); std::vector> where_filters; - const auto add_filter = [&](const FilterDAGInfo & filter_info, std::string description) + const auto add_filter = [&](FilterDAGInfo & filter_info, std::string description) { if (!filter_info.actions) return; @@ -805,34 +805,34 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres if (!prewhere_info->prewhere_actions) { - prewhere_info->prewhere_actions = filter_info.actions; + prewhere_info->prewhere_actions = std::move(filter_info.actions); prewhere_info->prewhere_column_name = filter_info.column_name; prewhere_info->remove_prewhere_column = filter_info.do_remove_column; prewhere_info->need_filter = true; } else if (!prewhere_info->row_level_filter) { - prewhere_info->row_level_filter = filter_info.actions; + prewhere_info->row_level_filter = std::move(filter_info.actions); prewhere_info->row_level_column_name = filter_info.column_name; prewhere_info->need_filter = true; } else { - where_filters.emplace_back(filter_info, std::move(description)); + where_filters.emplace_back(std::move(filter_info), std::move(description)); } } else { - where_filters.emplace_back(filter_info, std::move(description)); + where_filters.emplace_back(std::move(filter_info), std::move(description)); } }; auto row_policy_filter_info = buildRowPolicyFilterIfNeeded(storage, table_expression_query_info, planner_context, used_row_policies); - add_filter(row_policy_filter_info, "Row-level security filter"); if (row_policy_filter_info.actions) - table_expression_data.setRowLevelFilterActions(row_policy_filter_info.actions); + table_expression_data.setRowLevelFilterActions(row_policy_filter_info.actions->clone()); + add_filter(row_policy_filter_info, "Row-level security filter"); if (query_context->getParallelReplicasMode() == Context::ParallelReplicasMode::CUSTOM_KEY) { @@ -1063,7 +1063,7 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres if (from_stage == QueryProcessingStage::FetchColumns) { - auto rename_actions_dag = std::make_shared(query_plan.getCurrentDataStream().header.getColumnsWithTypeAndName()); + auto rename_actions_dag = std::make_unique(query_plan.getCurrentDataStream().header.getColumnsWithTypeAndName()); ActionsDAG::NodeRawConstPtrs updated_actions_dag_outputs; for (auto & output_node : rename_actions_dag->getOutputs()) @@ -1077,7 +1077,7 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres rename_actions_dag->getOutputs() = std::move(updated_actions_dag_outputs); - auto rename_step = std::make_unique(query_plan.getCurrentDataStream(), rename_actions_dag); + auto rename_step = std::make_unique(query_plan.getCurrentDataStream(), std::move(rename_actions_dag)); rename_step->setStepDescription("Change column names to column identifiers"); query_plan.addStep(std::move(rename_step)); } @@ -1117,7 +1117,7 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres void joinCastPlanColumnsToNullable(QueryPlan & plan_to_add_cast, PlannerContextPtr & planner_context, const FunctionOverloadResolverPtr & to_nullable_function) { - auto cast_actions_dag = std::make_shared(plan_to_add_cast.getCurrentDataStream().header.getColumnsWithTypeAndName()); + auto cast_actions_dag = std::make_unique(plan_to_add_cast.getCurrentDataStream().header.getColumnsWithTypeAndName()); for (auto & output_node : cast_actions_dag->getOutputs()) { @@ -1178,6 +1178,9 @@ JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_ join_table_expression, planner_context); + left_join_tree_query_plan.actions_dags.emplace_back(join_clauses_and_actions.left_join_expressions_actions.get()); + left_join_tree_query_plan.actions_dags.emplace_back(join_clauses_and_actions.right_join_expressions_actions.get()); + join_clauses_and_actions.left_join_expressions_actions->appendInputsForUnusedColumns(left_plan.getCurrentDataStream().header); auto left_join_expressions_actions_step = std::make_unique(left_plan.getCurrentDataStream(), join_clauses_and_actions.left_join_expressions_actions); left_join_expressions_actions_step->setStepDescription("JOIN actions"); @@ -1223,7 +1226,7 @@ JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_ auto join_cast_plan_output_nodes = [&](QueryPlan & plan_to_add_cast, std::unordered_map & plan_column_name_to_cast_type) { - auto cast_actions_dag = std::make_shared(plan_to_add_cast.getCurrentDataStream().header.getColumnsWithTypeAndName()); + auto cast_actions_dag = std::make_unique(plan_to_add_cast.getCurrentDataStream().header.getColumnsWithTypeAndName()); for (auto & output_node : cast_actions_dag->getOutputs()) { @@ -1381,9 +1384,10 @@ JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_ if (join_clauses_and_actions.mixed_join_expressions_actions) { + left_join_tree_query_plan.actions_dags.push_back(join_clauses_and_actions.mixed_join_expressions_actions.get()); ExpressionActionsPtr & mixed_join_expression = table_join->getMixedJoinExpression(); mixed_join_expression = std::make_shared( - join_clauses_and_actions.mixed_join_expressions_actions, + std::move(join_clauses_and_actions.mixed_join_expressions_actions), ExpressionActionsSettings::fromContext(planner_context->getQueryContext())); } } @@ -1537,7 +1541,7 @@ JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_ result_plan.unitePlans(std::move(join_step), {std::move(plans)}); } - auto drop_unused_columns_after_join_actions_dag = std::make_shared(result_plan.getCurrentDataStream().header.getColumnsWithTypeAndName()); + auto drop_unused_columns_after_join_actions_dag = std::make_unique(result_plan.getCurrentDataStream().header.getColumnsWithTypeAndName()); ActionsDAG::NodeRawConstPtrs drop_unused_columns_after_join_actions_dag_updated_outputs; std::unordered_set drop_unused_columns_after_join_actions_dag_updated_outputs_names; std::optional first_skipped_column_node_index; @@ -1582,14 +1586,14 @@ JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_ left_join_tree_query_plan.used_row_policies.insert(right_join_tree_query_plan_row_policy); /// Collect all required actions dags in `left_join_tree_query_plan.actions_dags` - for (auto && action_dag : right_join_tree_query_plan.actions_dags) + for (const auto * action_dag : right_join_tree_query_plan.actions_dags) left_join_tree_query_plan.actions_dags.emplace_back(action_dag); - if (join_clauses_and_actions.left_join_expressions_actions) - left_join_tree_query_plan.actions_dags.emplace_back(std::move(join_clauses_and_actions.left_join_expressions_actions)); - if (join_clauses_and_actions.right_join_expressions_actions) - left_join_tree_query_plan.actions_dags.emplace_back(std::move(join_clauses_and_actions.right_join_expressions_actions)); - if (join_clauses_and_actions.mixed_join_expressions_actions) - left_join_tree_query_plan.actions_dags.push_back(join_clauses_and_actions.mixed_join_expressions_actions); + // if (join_clauses_and_actions.left_join_expressions_actions) + // left_join_tree_query_plan.actions_dags.emplace_back(join_clauses_and_actions.left_join_expressions_actions.get()); + // if (join_clauses_and_actions.right_join_expressions_actions) + // left_join_tree_query_plan.actions_dags.emplace_back(join_clauses_and_actions.right_join_expressions_actions.get()); + // if (join_clauses_and_actions.mixed_join_expressions_actions) + // left_join_tree_query_plan.actions_dags.push_back(join_clauses_and_actions.mixed_join_expressions_actions.get()); auto mapping = std::move(left_join_tree_query_plan.query_node_to_plan_step_mapping); auto & r_mapping = right_join_tree_query_plan.query_node_to_plan_step_mapping; @@ -1619,7 +1623,7 @@ JoinTreeQueryPlan buildQueryPlanForArrayJoinNode(const QueryTreeNodePtr & array_ auto plan = std::move(join_tree_query_plan.query_plan); auto plan_output_columns = plan.getCurrentDataStream().header.getColumnsWithTypeAndName(); - ActionsDAGPtr array_join_action_dag = std::make_shared(plan_output_columns); + ActionsDAGPtr array_join_action_dag = std::make_unique(plan_output_columns); PlannerActionsVisitor actions_visitor(planner_context); std::unordered_set array_join_expressions_output_nodes; @@ -1642,13 +1646,13 @@ JoinTreeQueryPlan buildQueryPlanForArrayJoinNode(const QueryTreeNodePtr & array_ array_join_action_dag->appendInputsForUnusedColumns(plan.getCurrentDataStream().header); - join_tree_query_plan.actions_dags.push_back(array_join_action_dag); + join_tree_query_plan.actions_dags.push_back(array_join_action_dag.get()); - auto array_join_actions = std::make_unique(plan.getCurrentDataStream(), array_join_action_dag); + auto array_join_actions = std::make_unique(plan.getCurrentDataStream(), std::move(array_join_action_dag)); array_join_actions->setStepDescription("ARRAY JOIN actions"); plan.addStep(std::move(array_join_actions)); - auto drop_unused_columns_before_array_join_actions_dag = std::make_shared(plan.getCurrentDataStream().header.getColumnsWithTypeAndName()); + auto drop_unused_columns_before_array_join_actions_dag = std::make_unique(plan.getCurrentDataStream().header.getColumnsWithTypeAndName()); ActionsDAG::NodeRawConstPtrs drop_unused_columns_before_array_join_actions_dag_updated_outputs; std::unordered_set drop_unused_columns_before_array_join_actions_dag_updated_outputs_names; diff --git a/src/Planner/PlannerJoinTree.h b/src/Planner/PlannerJoinTree.h index 9110b2bfef9..675079427eb 100644 --- a/src/Planner/PlannerJoinTree.h +++ b/src/Planner/PlannerJoinTree.h @@ -16,7 +16,7 @@ struct JoinTreeQueryPlan QueryPlan query_plan; QueryProcessingStage::Enum from_stage; std::set used_row_policies{}; - std::vector actions_dags{}; + std::vector actions_dags{}; std::unordered_map query_node_to_plan_step_mapping{}; }; diff --git a/src/Planner/PlannerJoins.cpp b/src/Planner/PlannerJoins.cpp index 84efdd21336..45842c0d705 100644 --- a/src/Planner/PlannerJoins.cpp +++ b/src/Planner/PlannerJoins.cpp @@ -376,8 +376,8 @@ JoinClausesAndActions buildJoinClausesAndActions( const JoinNode & join_node, const PlannerContextPtr & planner_context) { - ActionsDAGPtr left_join_actions = std::make_shared(left_table_expression_columns); - ActionsDAGPtr right_join_actions = std::make_shared(right_table_expression_columns); + ActionsDAGPtr left_join_actions = std::make_unique(left_table_expression_columns); + ActionsDAGPtr right_join_actions = std::make_unique(right_table_expression_columns); ColumnsWithTypeAndName mixed_table_expression_columns; for (const auto & left_column : left_table_expression_columns) { @@ -387,7 +387,7 @@ JoinClausesAndActions buildJoinClausesAndActions( { mixed_table_expression_columns.push_back(right_column); } - ActionsDAGPtr mixed_join_actions = std::make_shared(mixed_table_expression_columns); + ActionsDAGPtr mixed_join_actions = std::make_unique(mixed_table_expression_columns); /** It is possible to have constant value in JOIN ON section, that we need to ignore during DAG construction. * If we do not ignore it, this function will be replaced by underlying constant. @@ -601,7 +601,7 @@ JoinClausesAndActions buildJoinClausesAndActions( /// So, for each column, we recalculate the value of the whole expression from JOIN ON to check if rows should be joined. if (result.join_clauses.size() > 1) { - auto mixed_join_expressions_actions = std::make_shared(mixed_table_expression_columns); + auto mixed_join_expressions_actions = std::make_unique(mixed_table_expression_columns); PlannerActionsVisitor join_expression_visitor(planner_context); auto join_expression_dag_node_raw_pointers = join_expression_visitor.visit(*mixed_join_expressions_actions, join_expression); if (join_expression_dag_node_raw_pointers.size() != 1) @@ -611,14 +611,14 @@ JoinClausesAndActions buildJoinClausesAndActions( mixed_join_expressions_actions->addOrReplaceInOutputs(*join_expression_dag_node_raw_pointers[0]); Names required_names{join_expression_dag_node_raw_pointers[0]->result_name}; mixed_join_expressions_actions->removeUnusedActions(required_names); - result.mixed_join_expressions_actions = mixed_join_expressions_actions; + result.mixed_join_expressions_actions = std::move(mixed_join_expressions_actions); } else { const auto & join_clause = result.join_clauses.front(); const auto & mixed_filter_condition_nodes = join_clause.getMixedFilterConditionNodes(); auto mixed_join_expressions_actions = ActionsDAG::buildFilterActionsDAG(mixed_filter_condition_nodes, {}, true); - result.mixed_join_expressions_actions = mixed_join_expressions_actions; + result.mixed_join_expressions_actions = std::move(mixed_join_expressions_actions); } auto outputs = result.mixed_join_expressions_actions->getOutputs(); if (outputs.size() != 1) diff --git a/src/Planner/PlannerWindowFunctions.cpp b/src/Planner/PlannerWindowFunctions.cpp index ce74d82c08d..9deceeef9a3 100644 --- a/src/Planner/PlannerWindowFunctions.cpp +++ b/src/Planner/PlannerWindowFunctions.cpp @@ -120,7 +120,7 @@ std::vector extractWindowDescriptions(const QueryTreeNodes & return result; } -void sortWindowDescriptions(std::vector & window_descriptions) +std::vector sortWindowDescriptions(const std::vector & window_descriptions) { auto window_description_comparator = [](const WindowDescription & lhs, const WindowDescription & rhs) { @@ -151,7 +151,16 @@ void sortWindowDescriptions(std::vector & window_descriptions return left.size() > right.size(); }; - ::sort(window_descriptions.begin(), window_descriptions.end(), window_description_comparator); + auto comparator = [&](size_t lhs, size_t rhs) + { + return window_description_comparator(window_descriptions[lhs], window_descriptions[rhs]); + }; + + std::vector perm(window_descriptions.size()); + std::iota(perm.begin(), perm.end(), 0U); + ::sort(perm.begin(), perm.end(), comparator); + + return perm; } } diff --git a/src/Planner/PlannerWindowFunctions.h b/src/Planner/PlannerWindowFunctions.h index 1552ef5a71f..3039ecefc4b 100644 --- a/src/Planner/PlannerWindowFunctions.h +++ b/src/Planner/PlannerWindowFunctions.h @@ -15,6 +15,6 @@ std::vector extractWindowDescriptions(const QueryTreeNodes & /** Try to sort window descriptions in such an order that the window with the longest * sort description goes first, and all window that use its prefixes follow. */ -void sortWindowDescriptions(std::vector & window_descriptions); +std::vector sortWindowDescriptions(const std::vector & window_descriptions); } diff --git a/src/Planner/Utils.cpp b/src/Planner/Utils.cpp index 18a6d297838..493ecf5ef53 100644 --- a/src/Planner/Utils.cpp +++ b/src/Planner/Utils.cpp @@ -440,7 +440,7 @@ FilterDAGInfo buildFilterInfo(QueryTreeNodePtr filter_query_tree, collectSourceColumns(filter_query_tree, planner_context, false /*keep_alias_columns*/); collectSets(filter_query_tree, *planner_context); - auto filter_actions_dag = std::make_shared(); + auto filter_actions_dag = std::make_unique(); PlannerActionsVisitor actions_visitor(planner_context, false /*use_column_identifier_as_action_node_name*/); auto expression_nodes = actions_visitor.visit(*filter_actions_dag, filter_query_tree); diff --git a/src/Processors/QueryPlan/AggregatingStep.cpp b/src/Processors/QueryPlan/AggregatingStep.cpp index 0d7e05af1de..64ba7f7cd2a 100644 --- a/src/Processors/QueryPlan/AggregatingStep.cpp +++ b/src/Processors/QueryPlan/AggregatingStep.cpp @@ -303,7 +303,7 @@ void AggregatingStep::transformPipeline(QueryPipelineBuilder & pipeline, const B const auto & header = ports[set_counter]->getHeader(); /// Here we create a DAG which fills missing keys and adds `__grouping_set` column - auto dag = std::make_shared(header.getColumnsWithTypeAndName()); + auto dag = std::make_unique(header.getColumnsWithTypeAndName()); ActionsDAG::NodeRawConstPtrs outputs; outputs.reserve(output_header.columns() + 1); @@ -347,7 +347,7 @@ void AggregatingStep::transformPipeline(QueryPipelineBuilder & pipeline, const B } dag->getOutputs().swap(outputs); - auto expression = std::make_shared(dag, settings.getActionsSettings()); + auto expression = std::make_shared(std::move(dag), settings.getActionsSettings()); auto transform = std::make_shared(header, expression); connect(*ports[set_counter], transform->getInputPort()); diff --git a/src/Processors/QueryPlan/CubeStep.cpp b/src/Processors/QueryPlan/CubeStep.cpp index d010a3327a6..b6c70061987 100644 --- a/src/Processors/QueryPlan/CubeStep.cpp +++ b/src/Processors/QueryPlan/CubeStep.cpp @@ -36,7 +36,7 @@ CubeStep::CubeStep(const DataStream & input_stream_, Aggregator::Params params_, ProcessorPtr addGroupingSetForTotals(const Block & header, const Names & keys, bool use_nulls, const BuildQueryPipelineSettings & settings, UInt64 grouping_set_number) { - auto dag = std::make_shared(header.getColumnsWithTypeAndName()); + auto dag = std::make_unique(header.getColumnsWithTypeAndName()); auto & outputs = dag->getOutputs(); if (use_nulls) @@ -59,7 +59,7 @@ ProcessorPtr addGroupingSetForTotals(const Block & header, const Names & keys, b grouping_node = &dag->materializeNode(*grouping_node); outputs.insert(outputs.begin(), grouping_node); - auto expression = std::make_shared(dag, settings.getActionsSettings()); + auto expression = std::make_shared(std::move(dag), settings.getActionsSettings()); return std::make_shared(header, expression); } diff --git a/src/Processors/QueryPlan/ExpressionStep.cpp b/src/Processors/QueryPlan/ExpressionStep.cpp index 0ccb0c4492a..90ac94a1ace 100644 --- a/src/Processors/QueryPlan/ExpressionStep.cpp +++ b/src/Processors/QueryPlan/ExpressionStep.cpp @@ -30,13 +30,13 @@ ExpressionStep::ExpressionStep(const DataStream & input_stream_, const ActionsDA input_stream_, ExpressionTransform::transformHeader(input_stream_.header, *actions_dag_), getTraits(actions_dag_, input_stream_.header, input_stream_.sort_description)) - , actions_dag(actions_dag_) + , actions_dag(actions_dag_->clone()) { } void ExpressionStep::transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings & settings) { - auto expression = std::make_shared(actions_dag, settings.getActionsSettings()); + auto expression = std::make_shared(actions_dag->clone(), settings.getActionsSettings()); pipeline.addSimpleTransform([&](const Block & header) { @@ -49,7 +49,7 @@ void ExpressionStep::transformPipeline(QueryPipelineBuilder & pipeline, const Bu pipeline.getHeader().getColumnsWithTypeAndName(), output_stream->header.getColumnsWithTypeAndName(), ActionsDAG::MatchColumnsMode::Name); - auto convert_actions = std::make_shared(convert_actions_dag, settings.getActionsSettings()); + auto convert_actions = std::make_shared(std::move(convert_actions_dag), settings.getActionsSettings()); pipeline.addSimpleTransform([&](const Block & header) { @@ -61,13 +61,13 @@ void ExpressionStep::transformPipeline(QueryPipelineBuilder & pipeline, const Bu void ExpressionStep::describeActions(FormatSettings & settings) const { String prefix(settings.offset, settings.indent_char); - auto expression = std::make_shared(actions_dag); + auto expression = std::make_shared(actions_dag->clone()); expression->describeActions(settings.out, prefix); } void ExpressionStep::describeActions(JSONBuilder::JSONMap & map) const { - auto expression = std::make_shared(actions_dag); + auto expression = std::make_shared(actions_dag->clone()); map.add("Expression", expression->toTree()); } @@ -79,7 +79,7 @@ void ExpressionStep::updateOutputStream() if (!getDataStreamTraits().preserves_sorting) return; - FindAliasForInputName alias_finder(actions_dag); + FindAliasForInputName alias_finder(*actions_dag); const auto & input_sort_description = getInputStreams().front().sort_description; for (size_t i = 0, s = input_sort_description.size(); i < s; ++i) { diff --git a/src/Processors/QueryPlan/ExpressionStep.h b/src/Processors/QueryPlan/ExpressionStep.h index 3eef14ac129..ebbac8217cb 100644 --- a/src/Processors/QueryPlan/ExpressionStep.h +++ b/src/Processors/QueryPlan/ExpressionStep.h @@ -5,7 +5,7 @@ namespace DB { class ActionsDAG; -using ActionsDAGPtr = std::shared_ptr; +using ActionsDAGPtr = std::unique_ptr; class ExpressionTransform; class JoiningTransform; diff --git a/src/Processors/QueryPlan/FilterStep.cpp b/src/Processors/QueryPlan/FilterStep.cpp index 72934665b5c..ef9f1d17822 100644 --- a/src/Processors/QueryPlan/FilterStep.cpp +++ b/src/Processors/QueryPlan/FilterStep.cpp @@ -46,7 +46,6 @@ FilterStep::FilterStep( filter_column_name_, remove_filter_column_), getTraits(actions_dag_, input_stream_.header, input_stream_.sort_description, remove_filter_column_, filter_column_name_)) - , actions_dag(actions_dag_) , filter_column_name(std::move(filter_column_name_)) , remove_filter_column(remove_filter_column_) { @@ -56,7 +55,7 @@ FilterStep::FilterStep( void FilterStep::transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings & settings) { - auto expression = std::make_shared(actions_dag, settings.getActionsSettings()); + auto expression = std::make_shared(std::move(actions_dag), settings.getActionsSettings()); pipeline.addSimpleTransform([&](const Block & header, QueryPipelineBuilder::StreamType stream_type) { @@ -70,7 +69,7 @@ void FilterStep::transformPipeline(QueryPipelineBuilder & pipeline, const BuildQ pipeline.getHeader().getColumnsWithTypeAndName(), output_stream->header.getColumnsWithTypeAndName(), ActionsDAG::MatchColumnsMode::Name); - auto convert_actions = std::make_shared(convert_actions_dag, settings.getActionsSettings()); + auto convert_actions = std::make_shared(std::move(convert_actions_dag), settings.getActionsSettings()); pipeline.addSimpleTransform([&](const Block & header) { @@ -88,7 +87,7 @@ void FilterStep::describeActions(FormatSettings & settings) const settings.out << " (removed)"; settings.out << '\n'; - auto expression = std::make_shared(actions_dag); + auto expression = std::make_shared(actions_dag->clone()); expression->describeActions(settings.out, prefix); } @@ -97,7 +96,7 @@ void FilterStep::describeActions(JSONBuilder::JSONMap & map) const map.add("Filter Column", filter_column_name); map.add("Removes Filter", remove_filter_column); - auto expression = std::make_shared(actions_dag); + auto expression = std::make_shared(actions_dag->clone()); map.add("Expression", expression->toTree()); } @@ -111,7 +110,7 @@ void FilterStep::updateOutputStream() if (!getDataStreamTraits().preserves_sorting) return; - FindAliasForInputName alias_finder(actions_dag); + FindAliasForInputName alias_finder(*actions_dag); const auto & input_sort_description = getInputStreams().front().sort_description; for (size_t i = 0, s = input_sort_description.size(); i < s; ++i) { diff --git a/src/Processors/QueryPlan/FilterStep.h b/src/Processors/QueryPlan/FilterStep.h index 939d0900c86..0f894a570b7 100644 --- a/src/Processors/QueryPlan/FilterStep.h +++ b/src/Processors/QueryPlan/FilterStep.h @@ -5,7 +5,7 @@ namespace DB { class ActionsDAG; -using ActionsDAGPtr = std::shared_ptr; +using ActionsDAGPtr = std::unique_ptr; /// Implements WHERE, HAVING operations. See FilterTransform. class FilterStep : public ITransformingStep diff --git a/src/Processors/QueryPlan/Optimizations/distinctReadInOrder.cpp b/src/Processors/QueryPlan/Optimizations/distinctReadInOrder.cpp index 0a3a4094a66..87e16b5a244 100644 --- a/src/Processors/QueryPlan/Optimizations/distinctReadInOrder.cpp +++ b/src/Processors/QueryPlan/Optimizations/distinctReadInOrder.cpp @@ -10,7 +10,7 @@ namespace DB::QueryPlanOptimizations { /// build actions DAG from stack of steps -static ActionsDAGPtr buildActionsForPlanPath(std::vector & dag_stack) +static ActionsDAGPtr buildActionsForPlanPath(std::vector & dag_stack) { if (dag_stack.empty()) return nullptr; @@ -27,10 +27,10 @@ static ActionsDAGPtr buildActionsForPlanPath(std::vector & dag_st } static std::set -getOriginalDistinctColumns(const ColumnsWithTypeAndName & distinct_columns, std::vector & dag_stack) +getOriginalDistinctColumns(const ColumnsWithTypeAndName & distinct_columns, std::vector & dag_stack) { auto actions = buildActionsForPlanPath(dag_stack); - FindOriginalNodeForOutputName original_node_finder(actions); + FindOriginalNodeForOutputName original_node_finder(*actions); std::set original_distinct_columns; for (const auto & column : distinct_columns) { @@ -65,7 +65,7 @@ size_t tryDistinctReadInOrder(QueryPlan::Node * parent_node) /// (3) gather actions DAG to find original names for columns in distinct step later std::vector steps_to_update; QueryPlan::Node * node = parent_node; - std::vector dag_stack; + std::vector dag_stack; while (!node->children.empty()) { auto * step = dynamic_cast(node->step.get()); @@ -79,9 +79,9 @@ size_t tryDistinctReadInOrder(QueryPlan::Node * parent_node) steps_to_update.push_back(step); if (const auto * const expr = typeid_cast(step); expr) - dag_stack.push_back(expr->getExpression()); + dag_stack.push_back(expr->getExpression().get()); else if (const auto * const filter = typeid_cast(step); filter) - dag_stack.push_back(filter->getExpression()); + dag_stack.push_back(filter->getExpression().get()); node = node->children.front(); } diff --git a/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp b/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp index 263598bdca7..ff1cefff09a 100644 --- a/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp +++ b/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp @@ -611,7 +611,7 @@ size_t tryPushDownFilter(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes if (auto * read_from_merge = typeid_cast(child.get())) { - FilterDAGInfo info{filter->getExpression(), filter->getFilterColumnName(), filter->removesFilterColumn()}; + FilterDAGInfo info{filter->getExpression()->clone(), filter->getFilterColumnName(), filter->removesFilterColumn()}; read_from_merge->addFilter(std::move(info)); std::swap(*parent_node, *child_node); return 1; diff --git a/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp b/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp index 1badd315200..13b691da888 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp @@ -108,7 +108,7 @@ void optimizePrewhere(Stack & stack, QueryPlan::Nodes &) prewhere_info->need_filter = true; prewhere_info->remove_prewhere_column = optimize_result.fully_moved_to_prewhere && filter_step->removesFilterColumn(); - auto filter_expression = filter_step->getExpression(); + auto filter_expression = filter_step->getExpression()->clone(); const auto & filter_column_name = filter_step->getFilterColumnName(); if (prewhere_info->remove_prewhere_column) diff --git a/src/Processors/QueryPlan/Optimizations/optimizePrimaryKeyCondition.cpp b/src/Processors/QueryPlan/Optimizations/optimizePrimaryKeyCondition.cpp index dbcaf5f00a7..e57d3319076 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizePrimaryKeyCondition.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizePrimaryKeyCondition.cpp @@ -18,15 +18,15 @@ void optimizePrimaryKeyCondition(const Stack & stack) const auto & storage_prewhere_info = source_step_with_filter->getPrewhereInfo(); if (storage_prewhere_info) { - source_step_with_filter->addFilter(storage_prewhere_info->prewhere_actions, storage_prewhere_info->prewhere_column_name); + source_step_with_filter->addFilter(storage_prewhere_info->prewhere_actions->clone(), storage_prewhere_info->prewhere_column_name); if (storage_prewhere_info->row_level_filter) - source_step_with_filter->addFilter(storage_prewhere_info->row_level_filter, storage_prewhere_info->row_level_column_name); + source_step_with_filter->addFilter(storage_prewhere_info->row_level_filter->clone(), storage_prewhere_info->row_level_column_name); } for (auto iter = stack.rbegin() + 1; iter != stack.rend(); ++iter) { if (auto * filter_step = typeid_cast(iter->node->step.get())) - source_step_with_filter->addFilter(filter_step->getExpression(), filter_step->getFilterColumnName()); + source_step_with_filter->addFilter(filter_step->getExpression()->clone(), filter_step->getFilterColumnName()); /// Note: actually, plan optimizations merge Filter and Expression steps. /// Ideally, chain should look like (Expression -> ...) -> (Filter -> ...) -> ReadFromStorage, diff --git a/src/Processors/QueryPlan/Optimizations/optimizeReadInOrder.cpp b/src/Processors/QueryPlan/Optimizations/optimizeReadInOrder.cpp index 537555afa2a..8e782e68db8 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizeReadInOrder.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizeReadInOrder.cpp @@ -1066,13 +1066,13 @@ size_t tryReuseStorageOrderingForWindowFunctions(QueryPlan::Node * parent_node, for (const auto & actions_dag : window_desc.partition_by_actions) { order_by_elements_actions.emplace_back( - std::make_shared(actions_dag, ExpressionActionsSettings::fromContext(context, CompileExpressions::yes))); + std::make_shared(actions_dag->clone(), ExpressionActionsSettings::fromContext(context, CompileExpressions::yes))); } for (const auto & actions_dag : window_desc.order_by_actions) { order_by_elements_actions.emplace_back( - std::make_shared(actions_dag, ExpressionActionsSettings::fromContext(context, CompileExpressions::yes))); + std::make_shared(actions_dag->clone(), ExpressionActionsSettings::fromContext(context, CompileExpressions::yes))); } auto order_optimizer = std::make_shared( diff --git a/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp b/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp index 70327bc95b4..7c45ef48252 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp @@ -486,7 +486,7 @@ AggregateProjectionCandidates getAggregateProjectionCandidates( auto block = reading.getMergeTreeData().getMinMaxCountProjectionBlock( metadata, candidate.dag->getRequiredColumnsNames(), - (dag.filter_node ? dag.dag : nullptr), + (dag.filter_node ? dag.dag.get() : nullptr), parts, max_added_blocks.get(), context); @@ -675,7 +675,7 @@ std::optional optimizeUseAggregateProjections(QueryPlan::Node & node, Qu query_info, context, max_added_blocks, - candidate.dag); + candidate.dag.get()); if (!analyzed) continue; diff --git a/src/Processors/QueryPlan/Optimizations/optimizeUseNormalProjection.cpp b/src/Processors/QueryPlan/Optimizations/optimizeUseNormalProjection.cpp index 0af3869ccf1..c7e96d66817 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizeUseNormalProjection.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizeUseNormalProjection.cpp @@ -172,7 +172,7 @@ std::optional optimizeUseNormalProjections(Stack & stack, QueryPlan::Nod query_info, context, max_added_blocks, - query.filter_node ? query.dag : nullptr); + query.filter_node ? query.dag.get() : nullptr); if (!analyzed) continue; diff --git a/src/Processors/QueryPlan/Optimizations/projectionsCommon.cpp b/src/Processors/QueryPlan/Optimizations/projectionsCommon.cpp index af1578d6af8..d8b40b22904 100644 --- a/src/Processors/QueryPlan/Optimizations/projectionsCommon.cpp +++ b/src/Processors/QueryPlan/Optimizations/projectionsCommon.cpp @@ -213,7 +213,7 @@ bool analyzeProjectionCandidate( const SelectQueryInfo & query_info, const ContextPtr & context, const std::shared_ptr & max_added_blocks, - const ActionsDAGPtr & dag) + const ActionsDAG * dag) { MergeTreeData::DataPartsVector projection_parts; MergeTreeData::DataPartsVector normal_parts; @@ -238,7 +238,7 @@ bool analyzeProjectionCandidate( auto projection_query_info = query_info; projection_query_info.prewhere_info = nullptr; - projection_query_info.filter_actions_dag = dag; + projection_query_info.filter_actions_dag = dag->clone(); auto projection_result_ptr = reader.estimateNumMarksToRead( std::move(projection_parts), diff --git a/src/Processors/QueryPlan/Optimizations/projectionsCommon.h b/src/Processors/QueryPlan/Optimizations/projectionsCommon.h index e1e106b988e..59ad3a43b97 100644 --- a/src/Processors/QueryPlan/Optimizations/projectionsCommon.h +++ b/src/Processors/QueryPlan/Optimizations/projectionsCommon.h @@ -60,6 +60,6 @@ bool analyzeProjectionCandidate( const SelectQueryInfo & query_info, const ContextPtr & context, const std::shared_ptr & max_added_blocks, - const ActionsDAGPtr & dag); + const ActionsDAG * dag); } diff --git a/src/Processors/QueryPlan/Optimizations/removeRedundantDistinct.cpp b/src/Processors/QueryPlan/Optimizations/removeRedundantDistinct.cpp index 51df25b35f4..d3c75c988e7 100644 --- a/src/Processors/QueryPlan/Optimizations/removeRedundantDistinct.cpp +++ b/src/Processors/QueryPlan/Optimizations/removeRedundantDistinct.cpp @@ -65,7 +65,7 @@ namespace } /// build actions DAG from stack of steps - ActionsDAGPtr buildActionsForPlanPath(std::vector & dag_stack) + ActionsDAGPtr buildActionsForPlanPath(std::vector & dag_stack) { if (dag_stack.empty()) return nullptr; @@ -83,7 +83,7 @@ namespace } bool compareAggregationKeysWithDistinctColumns( - const Names & aggregation_keys, const DistinctColumns & distinct_columns, std::vector> actions_chain) + const Names & aggregation_keys, const DistinctColumns & distinct_columns, std::vector> actions_chain) { logDebug("aggregation_keys", aggregation_keys); logDebug("aggregation_keys size", aggregation_keys.size()); @@ -93,7 +93,8 @@ namespace std::set source_columns; for (auto & actions : actions_chain) { - FindOriginalNodeForOutputName original_node_finder(buildActionsForPlanPath(actions)); + auto tmp_actions = buildActionsForPlanPath(actions); + FindOriginalNodeForOutputName original_node_finder(*tmp_actions); for (const auto & column : current_columns) { logDebug("distinct column name", column); @@ -152,8 +153,8 @@ namespace const DistinctStep * distinct_step = typeid_cast(distinct_node->step.get()); chassert(distinct_step); - std::vector dag_stack; - std::vector> actions_chain; + std::vector dag_stack; + std::vector> actions_chain; const DistinctStep * inner_distinct_step = nullptr; const IQueryPlanStep * aggregation_before_distinct = nullptr; const QueryPlan::Node * node = distinct_node; @@ -182,9 +183,9 @@ namespace } if (const auto * const expr = typeid_cast(current_step); expr) - dag_stack.push_back(expr->getExpression()); + dag_stack.push_back(expr->getExpression().get()); else if (const auto * const filter = typeid_cast(current_step); filter) - dag_stack.push_back(filter->getExpression()); + dag_stack.push_back(filter->getExpression().get()); node = node->children.front(); if (inner_distinct_step = typeid_cast(node->step.get()); inner_distinct_step) @@ -222,7 +223,7 @@ namespace chassert(distinct_step); const auto distinct_columns = getDistinctColumns(distinct_step); - std::vector dag_stack; + std::vector dag_stack; const DistinctStep * inner_distinct_step = nullptr; const QueryPlan::Node * node = distinct_node; while (!node->children.empty()) @@ -235,9 +236,9 @@ namespace } if (const auto * const expr = typeid_cast(current_step); expr) - dag_stack.push_back(expr->getExpression()); + dag_stack.push_back(expr->getExpression().get()); else if (const auto * const filter = typeid_cast(current_step); filter) - dag_stack.push_back(filter->getExpression()); + dag_stack.push_back(filter->getExpression().get()); node = node->children.front(); inner_distinct_step = typeid_cast(node->step.get()); @@ -267,7 +268,7 @@ namespace logActionsDAG("distinct pass: merged DAG", path_actions); /// compare columns of two DISTINCTs - FindOriginalNodeForOutputName original_node_finder(path_actions); + FindOriginalNodeForOutputName original_node_finder(*path_actions); for (const auto & column : distinct_columns) { const auto * alias_node = original_node_finder.find(String(column)); diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index b2d8aa0e218..e5370c1c130 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -174,7 +174,7 @@ static void updateSortDescriptionForOutputStream( { if (prewhere_info->prewhere_actions) { - FindOriginalNodeForOutputName original_column_finder(prewhere_info->prewhere_actions); + FindOriginalNodeForOutputName original_column_finder(*prewhere_info->prewhere_actions); for (auto & column : original_header) { const auto * original_node = original_column_finder.find(column.name); @@ -185,7 +185,7 @@ static void updateSortDescriptionForOutputStream( if (prewhere_info->row_level_filter) { - FindOriginalNodeForOutputName original_column_finder(prewhere_info->row_level_filter); + FindOriginalNodeForOutputName original_column_finder(*prewhere_info->row_level_filter); for (auto & column : original_header) { const auto * original_node = original_column_finder.find(column.name); @@ -830,10 +830,10 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreams(RangesInDataParts && parts_ pipes[0].getHeader().getColumnsWithTypeAndName(), pipes[1].getHeader().getColumnsWithTypeAndName(), ActionsDAG::MatchColumnsMode::Name); + auto converting_expr = std::make_shared(std::move(conversion_action)); pipes[0].addSimpleTransform( - [conversion_action](const Block & header) + [converting_expr](const Block & header) { - auto converting_expr = std::make_shared(conversion_action); return std::make_shared(header, converting_expr); }); return Pipe::unitePipes(std::move(pipes)); @@ -849,7 +849,7 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreams(RangesInDataParts && parts_ static ActionsDAGPtr createProjection(const Block & header) { - return std::make_shared(header.getNamesAndTypesList()); + return std::make_unique(header.getNamesAndTypesList()); } Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsWithOrder( @@ -1046,7 +1046,7 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsWithOrder( for (size_t j = 0; j < prefix_size; ++j) sort_description.emplace_back(sorting_columns[j], input_order_info->direction); - auto sorting_key_expr = std::make_shared(sorting_key_prefix_expr); + auto sorting_key_expr = std::make_shared(std::move(sorting_key_prefix_expr)); auto merge_streams = [&](Pipe & pipe) { @@ -1341,10 +1341,10 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsFinal( pipes[0].getHeader().getColumnsWithTypeAndName(), pipes[1].getHeader().getColumnsWithTypeAndName(), ActionsDAG::MatchColumnsMode::Name); + auto converting_expr = std::make_shared(std::move(conversion_action)); pipes[0].addSimpleTransform( - [conversion_action](const Block & header) + [converting_expr](const Block & header) { - auto converting_expr = std::make_shared(conversion_action); return std::make_shared(header, converting_expr); }); return Pipe::unitePipes(std::move(pipes)); @@ -1378,7 +1378,7 @@ ReadFromMergeTree::AnalysisResultPtr ReadFromMergeTree::selectRangesToRead( static void buildIndexes( std::optional & indexes, - ActionsDAGPtr filter_actions_dag, + const ActionsDAG * filter_actions_dag, const MergeTreeData & data, const MergeTreeData::DataPartsVector & parts, const ContextPtr & context, @@ -1518,11 +1518,11 @@ void ReadFromMergeTree::applyFilters(ActionDAGNodes added_filter_nodes) /// (1) SourceStepWithFilter::filter_nodes, (2) query_info.filter_actions_dag. Make sure there are consistent. /// TODO: Get rid of filter_actions_dag in query_info after we move analysis of /// parallel replicas and unused shards into optimization, similar to projection analysis. - query_info.filter_actions_dag = filter_actions_dag; + query_info.filter_actions_dag = std::move(filter_actions_dag); buildIndexes( indexes, - filter_actions_dag, + query_info.filter_actions_dag.get(), data, prepared_parts, context, @@ -1564,7 +1564,7 @@ ReadFromMergeTree::AnalysisResultPtr ReadFromMergeTree::selectRangesToRead( const Names & primary_key_column_names = primary_key.column_names; if (!indexes) - buildIndexes(indexes, query_info_.filter_actions_dag, data, parts, context_, query_info_, metadata_snapshot); + buildIndexes(indexes, query_info_.filter_actions_dag.get(), data, parts, context_, query_info_, metadata_snapshot); if (indexes->part_values && indexes->part_values->empty()) return std::make_shared(std::move(result)); @@ -1993,7 +1993,7 @@ void ReadFromMergeTree::initializePipeline(QueryPipelineBuilder & pipeline, cons if (result.sampling.use_sampling) { - auto sampling_actions = std::make_shared(result.sampling.filter_expression); + auto sampling_actions = std::make_shared(result.sampling.filter_expression->clone()); pipe.addSimpleTransform([&](const Block & header) { return std::make_shared( @@ -2031,7 +2031,7 @@ void ReadFromMergeTree::initializePipeline(QueryPipelineBuilder & pipeline, cons if (result_projection) { - auto projection_actions = std::make_shared(result_projection); + auto projection_actions = std::make_shared(result_projection->clone()); pipe.addSimpleTransform([&](const Block & header) { return std::make_shared(header, projection_actions); @@ -2048,7 +2048,7 @@ void ReadFromMergeTree::initializePipeline(QueryPipelineBuilder & pipeline, cons ActionsDAG::MatchColumnsMode::Name, true); - auto converting_dag_expr = std::make_shared(convert_actions_dag); + auto converting_dag_expr = std::make_shared(std::move(convert_actions_dag)); pipe.addSimpleTransform([&](const Block & header) { @@ -2126,7 +2126,7 @@ void ReadFromMergeTree::describeActions(FormatSettings & format_settings) const format_settings.out << " (removed)"; format_settings.out << '\n'; - auto expression = std::make_shared(prewhere_info->prewhere_actions); + auto expression = std::make_shared(prewhere_info->prewhere_actions->clone()); expression->describeActions(format_settings.out, prefix); } @@ -2135,7 +2135,7 @@ void ReadFromMergeTree::describeActions(FormatSettings & format_settings) const format_settings.out << prefix << "Row level filter" << '\n'; format_settings.out << prefix << "Row level filter column: " << prewhere_info->row_level_column_name << '\n'; - auto expression = std::make_shared(prewhere_info->row_level_filter); + auto expression = std::make_shared(prewhere_info->row_level_filter->clone()); expression->describeActions(format_settings.out, prefix); } } @@ -2161,7 +2161,7 @@ void ReadFromMergeTree::describeActions(JSONBuilder::JSONMap & map) const std::unique_ptr prewhere_filter_map = std::make_unique(); prewhere_filter_map->add("Prewhere filter column", prewhere_info->prewhere_column_name); prewhere_filter_map->add("Prewhere filter remove filter column", prewhere_info->remove_prewhere_column); - auto expression = std::make_shared(prewhere_info->prewhere_actions); + auto expression = std::make_shared(prewhere_info->prewhere_actions->clone()); prewhere_filter_map->add("Prewhere filter expression", expression->toTree()); prewhere_info_map->add("Prewhere filter", std::move(prewhere_filter_map)); @@ -2171,7 +2171,7 @@ void ReadFromMergeTree::describeActions(JSONBuilder::JSONMap & map) const { std::unique_ptr row_level_filter_map = std::make_unique(); row_level_filter_map->add("Row level filter column", prewhere_info->row_level_column_name); - auto expression = std::make_shared(prewhere_info->row_level_filter); + auto expression = std::make_shared(prewhere_info->row_level_filter->clone()); row_level_filter_map->add("Row level filter expression", expression->toTree()); prewhere_info_map->add("Row level filter", std::move(row_level_filter_map)); diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.h b/src/Processors/QueryPlan/ReadFromMergeTree.h index caa8aa2e1bd..e32507e1f22 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.h +++ b/src/Processors/QueryPlan/ReadFromMergeTree.h @@ -23,7 +23,7 @@ struct MergeTreeDataSelectSamplingData bool read_nothing = false; Float64 used_sample_factor = 1.0; std::shared_ptr filter_function; - ActionsDAGPtr filter_expression; + std::shared_ptr filter_expression; }; struct UsefulSkipIndexes diff --git a/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp b/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp index 11371578c79..b9b239c721b 100644 --- a/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp +++ b/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp @@ -441,7 +441,7 @@ Pipe ReadFromSystemNumbersStep::makePipe() chassert(numbers_storage.step != UInt64{0}); /// Build rpn of query filters - KeyCondition condition(filter_actions_dag, context, column_names, key_expression); + KeyCondition condition(filter_actions_dag.get(), context, column_names, key_expression); if (condition.extractPlainRanges(ranges)) { diff --git a/src/Processors/QueryPlan/SourceStepWithFilter.cpp b/src/Processors/QueryPlan/SourceStepWithFilter.cpp index ad0940b90b9..b91debc8239 100644 --- a/src/Processors/QueryPlan/SourceStepWithFilter.cpp +++ b/src/Processors/QueryPlan/SourceStepWithFilter.cpp @@ -110,7 +110,7 @@ void SourceStepWithFilter::describeActions(FormatSettings & format_settings) con format_settings.out << " (removed)"; format_settings.out << '\n'; - auto expression = std::make_shared(prewhere_info->prewhere_actions); + auto expression = std::make_shared(prewhere_info->prewhere_actions->clone()); expression->describeActions(format_settings.out, prefix); } @@ -119,7 +119,7 @@ void SourceStepWithFilter::describeActions(FormatSettings & format_settings) con format_settings.out << prefix << "Row level filter" << '\n'; format_settings.out << prefix << "Row level filter column: " << prewhere_info->row_level_column_name << '\n'; - auto expression = std::make_shared(prewhere_info->row_level_filter); + auto expression = std::make_shared(prewhere_info->row_level_filter->clone()); expression->describeActions(format_settings.out, prefix); } } @@ -137,7 +137,7 @@ void SourceStepWithFilter::describeActions(JSONBuilder::JSONMap & map) const std::unique_ptr prewhere_filter_map = std::make_unique(); prewhere_filter_map->add("Prewhere filter column", prewhere_info->prewhere_column_name); prewhere_filter_map->add("Prewhere filter remove filter column", prewhere_info->remove_prewhere_column); - auto expression = std::make_shared(prewhere_info->prewhere_actions); + auto expression = std::make_shared(prewhere_info->prewhere_actions->clone()); prewhere_filter_map->add("Prewhere filter expression", expression->toTree()); prewhere_info_map->add("Prewhere filter", std::move(prewhere_filter_map)); @@ -147,7 +147,7 @@ void SourceStepWithFilter::describeActions(JSONBuilder::JSONMap & map) const { std::unique_ptr row_level_filter_map = std::make_unique(); row_level_filter_map->add("Row level filter column", prewhere_info->row_level_column_name); - auto expression = std::make_shared(prewhere_info->row_level_filter); + auto expression = std::make_shared(prewhere_info->row_level_filter->clone()); row_level_filter_map->add("Row level filter expression", expression->toTree()); prewhere_info_map->add("Row level filter", std::move(row_level_filter_map)); diff --git a/src/Processors/QueryPlan/SourceStepWithFilter.h b/src/Processors/QueryPlan/SourceStepWithFilter.h index 126d4824fff..8ac0cc24ed1 100644 --- a/src/Processors/QueryPlan/SourceStepWithFilter.h +++ b/src/Processors/QueryPlan/SourceStepWithFilter.h @@ -33,6 +33,7 @@ public: } const ActionsDAGPtr & getFilterActionsDAG() const { return filter_actions_dag; } + ActionsDAGPtr detachFilterActionsDAG() { return std::move(filter_actions_dag); } const SelectQueryInfo & getQueryInfo() const { return query_info; } const PrewhereInfoPtr & getPrewhereInfo() const { return prewhere_info; } @@ -53,7 +54,7 @@ public: void applyFilters() { applyFilters(std::move(filter_nodes)); - filter_dags = {}; + filter_dags.clear(); } virtual void applyFilters(ActionDAGNodes added_filter_nodes); diff --git a/src/Processors/QueryPlan/TotalsHavingStep.cpp b/src/Processors/QueryPlan/TotalsHavingStep.cpp index ac5e144bf4a..45de6c31d24 100644 --- a/src/Processors/QueryPlan/TotalsHavingStep.cpp +++ b/src/Processors/QueryPlan/TotalsHavingStep.cpp @@ -46,7 +46,7 @@ TotalsHavingStep::TotalsHavingStep( getTraits(!filter_column_.empty())) , aggregates(aggregates_) , overflow_row(overflow_row_) - , actions_dag(actions_dag_) + , actions_dag(actions_dag_->clone()) , filter_column_name(filter_column_) , remove_filter(remove_filter_) , totals_mode(totals_mode_) @@ -57,7 +57,7 @@ TotalsHavingStep::TotalsHavingStep( void TotalsHavingStep::transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings & settings) { - auto expression_actions = actions_dag ? std::make_shared(actions_dag, settings.getActionsSettings()) : nullptr; + auto expression_actions = actions_dag ? std::make_shared(actions_dag->clone(), settings.getActionsSettings()) : nullptr; auto totals_having = std::make_shared( pipeline.getHeader(), @@ -100,7 +100,7 @@ void TotalsHavingStep::describeActions(FormatSettings & settings) const if (actions_dag) { bool first = true; - auto expression = std::make_shared(actions_dag); + auto expression = std::make_shared(actions_dag->clone()); for (const auto & action : expression->getActions()) { settings.out << prefix << (first ? "Actions: " @@ -117,7 +117,7 @@ void TotalsHavingStep::describeActions(JSONBuilder::JSONMap & map) const if (actions_dag) { map.add("Filter column", filter_column_name); - auto expression = std::make_shared(actions_dag); + auto expression = std::make_shared(actions_dag->clone()); map.add("Expression", expression->toTree()); } } diff --git a/src/Processors/QueryPlan/TotalsHavingStep.h b/src/Processors/QueryPlan/TotalsHavingStep.h index a81bc7bb1a9..52ef5437701 100644 --- a/src/Processors/QueryPlan/TotalsHavingStep.h +++ b/src/Processors/QueryPlan/TotalsHavingStep.h @@ -6,7 +6,7 @@ namespace DB { class ActionsDAG; -using ActionsDAGPtr = std::shared_ptr; +using ActionsDAGPtr = std::unique_ptr; enum class TotalsMode : uint8_t; diff --git a/src/Processors/QueryPlan/WindowStep.h b/src/Processors/QueryPlan/WindowStep.h index 74a0e5930c7..47883e5edf6 100644 --- a/src/Processors/QueryPlan/WindowStep.h +++ b/src/Processors/QueryPlan/WindowStep.h @@ -7,7 +7,7 @@ namespace DB { class ActionsDAG; -using ActionsDAGPtr = std::shared_ptr; +using ActionsDAGPtr = std::unique_ptr; class WindowTransform; diff --git a/src/Processors/SourceWithKeyCondition.h b/src/Processors/SourceWithKeyCondition.h index ee155d6f78c..fcf576637ff 100644 --- a/src/Processors/SourceWithKeyCondition.h +++ b/src/Processors/SourceWithKeyCondition.h @@ -16,13 +16,13 @@ protected: /// Represents pushed down filters in source std::shared_ptr key_condition; - void setKeyConditionImpl(const ActionsDAGPtr & filter_actions_dag, ContextPtr context, const Block & keys) + void setKeyConditionImpl(const ActionsDAG * filter_actions_dag, ContextPtr context, const Block & keys) { key_condition = std::make_shared( filter_actions_dag, context, keys.getNames(), - std::make_shared(std::make_shared(keys.getColumnsWithTypeAndName()))); + std::make_shared(std::make_unique(keys.getColumnsWithTypeAndName()))); } public: diff --git a/src/Processors/Transforms/FillingTransform.cpp b/src/Processors/Transforms/FillingTransform.cpp index bb38c3e1dc5..95267bc24e0 100644 --- a/src/Processors/Transforms/FillingTransform.cpp +++ b/src/Processors/Transforms/FillingTransform.cpp @@ -203,7 +203,7 @@ FillingTransform::FillingTransform( , use_with_fill_by_sorting_prefix(use_with_fill_by_sorting_prefix_) { if (interpolate_description) - interpolate_actions = std::make_shared(interpolate_description->actions); + interpolate_actions = std::make_shared(interpolate_description->actions->clone()); std::vector is_fill_column(header_.columns()); for (size_t i = 0, size = fill_description.size(); i < size; ++i) diff --git a/src/Storages/Hive/StorageHive.cpp b/src/Storages/Hive/StorageHive.cpp index 28d8128e052..2cd51259549 100644 --- a/src/Storages/Hive/StorageHive.cpp +++ b/src/Storages/Hive/StorageHive.cpp @@ -516,7 +516,7 @@ void StorageHive::initMinMaxIndexExpression() partition_names = partition_name_types.getNames(); partition_types = partition_name_types.getTypes(); partition_minmax_idx_expr = std::make_shared( - std::make_shared(partition_name_types), ExpressionActionsSettings::fromContext(getContext())); + std::make_unique(partition_name_types), ExpressionActionsSettings::fromContext(getContext())); } NamesAndTypesList all_name_types = metadata_snapshot->getColumns().getAllPhysical(); @@ -526,7 +526,7 @@ void StorageHive::initMinMaxIndexExpression() hivefile_name_types.push_back(column); } hivefile_minmax_idx_expr = std::make_shared( - std::make_shared(hivefile_name_types), ExpressionActionsSettings::fromContext(getContext())); + std::make_unique(hivefile_name_types), ExpressionActionsSettings::fromContext(getContext())); } ASTPtr StorageHive::extractKeyExpressionList(const ASTPtr & node) @@ -647,7 +647,7 @@ HiveFiles StorageHive::collectHiveFilesFromPartition( for (size_t i = 0; i < partition_names.size(); ++i) ranges.emplace_back(fields[i]); - const KeyCondition partition_key_condition(filter_actions_dag, getContext(), partition_names, partition_minmax_idx_expr); + const KeyCondition partition_key_condition(filter_actions_dag.get(), getContext(), partition_names, partition_minmax_idx_expr); if (!partition_key_condition.checkInHyperrectangle(ranges, partition_types).can_be_true) return {}; } @@ -715,7 +715,7 @@ HiveFilePtr StorageHive::getHiveFileIfNeeded( if (prune_level >= PruneLevel::File) { - const KeyCondition hivefile_key_condition(filter_actions_dag, getContext(), hivefile_name_types.getNames(), hivefile_minmax_idx_expr); + const KeyCondition hivefile_key_condition(filter_actions_dag.get(), getContext(), hivefile_name_types.getNames(), hivefile_minmax_idx_expr); if (hive_file->useFileMinMaxIndex()) { /// Load file level minmax index and apply diff --git a/src/Storages/KeyDescription.cpp b/src/Storages/KeyDescription.cpp index 2a697fa5654..e03ecc05064 100644 --- a/src/Storages/KeyDescription.cpp +++ b/src/Storages/KeyDescription.cpp @@ -160,7 +160,7 @@ KeyDescription KeyDescription::buildEmptyKey() { KeyDescription result; result.expression_list_ast = std::make_shared(); - result.expression = std::make_shared(std::make_shared(), ExpressionActionsSettings{}); + result.expression = std::make_shared(std::make_unique(), ExpressionActionsSettings{}); return result; } diff --git a/src/Storages/MergeTree/KeyCondition.cpp b/src/Storages/MergeTree/KeyCondition.cpp index f8cf19120c7..48ec5529af0 100644 --- a/src/Storages/MergeTree/KeyCondition.cpp +++ b/src/Storages/MergeTree/KeyCondition.cpp @@ -691,7 +691,7 @@ static const ActionsDAG::Node & cloneASTWithInversionPushDown( ActionsDAGPtr KeyCondition::cloneASTWithInversionPushDown(ActionsDAG::NodeRawConstPtrs nodes, const ContextPtr & context) { - auto res = std::make_shared(); + auto res = std::make_unique(); std::unordered_map to_inverted; @@ -777,7 +777,7 @@ void KeyCondition::getAllSpaceFillingCurves() } KeyCondition::KeyCondition( - ActionsDAGPtr filter_dag, + const ActionsDAG * filter_dag, ContextPtr context, const Names & key_column_names, const ExpressionActionsPtr & key_expr_, diff --git a/src/Storages/MergeTree/KeyCondition.h b/src/Storages/MergeTree/KeyCondition.h index 2bc3b108e02..14ef74ea113 100644 --- a/src/Storages/MergeTree/KeyCondition.h +++ b/src/Storages/MergeTree/KeyCondition.h @@ -41,7 +41,7 @@ class KeyCondition public: /// Construct key condition from ActionsDAG nodes KeyCondition( - ActionsDAGPtr filter_dag, + const ActionsDAG * filter_dag, ContextPtr context, const Names & key_column_names, const ExpressionActionsPtr & key_expr, diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 2e0ea4cdbcd..7b642c34f37 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -487,7 +487,7 @@ ConditionSelectivityEstimator MergeTreeData::getConditionSelectivityEstimatorByP ASTPtr expression_ast; ConditionSelectivityEstimator result; - PartitionPruner partition_pruner(storage_snapshot->metadata, filter_dag, local_context); + PartitionPruner partition_pruner(storage_snapshot->metadata, filter_dag.get(), local_context); if (partition_pruner.isUseless()) { @@ -746,7 +746,7 @@ ExpressionActionsPtr MergeTreeData::getMinMaxExpr(const KeyDescription & partiti if (!partition_key.column_names.empty()) partition_key_columns = partition_key.expression->getRequiredColumnsWithTypes(); - return std::make_shared(std::make_shared(partition_key_columns), settings); + return std::make_shared(std::make_unique(partition_key_columns), settings); } Names MergeTreeData::getMinMaxColumnsNames(const KeyDescription & partition_key) @@ -1152,7 +1152,7 @@ std::optional MergeTreeData::totalRowsByPartitionPredicateImpl( if (!virtual_columns_block.has(input->result_name)) valid = false; - PartitionPruner partition_pruner(metadata_snapshot, filter_dag, local_context, true /* strict */); + PartitionPruner partition_pruner(metadata_snapshot, filter_dag.get(), local_context, true /* strict */); if (partition_pruner.isUseless() && !valid) return {}; @@ -6819,7 +6819,7 @@ using PartitionIdToMaxBlock = std::unordered_map; Block MergeTreeData::getMinMaxCountProjectionBlock( const StorageMetadataPtr & metadata_snapshot, const Names & required_columns, - const ActionsDAGPtr & filter_dag, + const ActionsDAG * filter_dag, const DataPartsVector & parts, const PartitionIdToMaxBlock * max_block_numbers_to_read, ContextPtr query_context) const diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index c6f736a4afd..52916d85fef 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -403,7 +403,7 @@ public: Block getMinMaxCountProjectionBlock( const StorageMetadataPtr & metadata_snapshot, const Names & required_columns, - const ActionsDAGPtr & filter_dag, + const ActionsDAG * filter_dag, const DataPartsVector & parts, const PartitionIdToMaxBlock * max_block_numbers_to_read, ContextPtr query_context) const; diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index 2e287ff3042..61b8b6fdaa8 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -442,7 +442,7 @@ MergeTreeDataSelectSamplingData MergeTreeDataSelectExecutor::getSampling( } void MergeTreeDataSelectExecutor::buildKeyConditionFromPartOffset( - std::optional & part_offset_condition, const ActionsDAGPtr & filter_dag, ContextPtr context) + std::optional & part_offset_condition, const ActionsDAG * filter_dag, ContextPtr context) { if (!filter_dag) return; @@ -463,10 +463,10 @@ void MergeTreeDataSelectExecutor::buildKeyConditionFromPartOffset( return; part_offset_condition.emplace(KeyCondition{ - dag, + dag.get(), context, sample.getNames(), - std::make_shared(std::make_shared(sample.getColumnsWithTypeAndName()), ExpressionActionsSettings{}), + std::make_shared(std::make_unique(sample.getColumnsWithTypeAndName()), ExpressionActionsSettings{}), {}}); } @@ -474,7 +474,7 @@ std::optional> MergeTreeDataSelectExecutor::filterPar const StorageMetadataPtr & metadata_snapshot, const MergeTreeData & data, const MergeTreeData::DataPartsVector & parts, - const ActionsDAGPtr & filter_dag, + const ActionsDAG * filter_dag, ContextPtr context) { if (!filter_dag) diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h index 788355c1e59..39bff5eacd6 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h @@ -161,7 +161,7 @@ public: /// If possible, construct optional key condition from predicates containing _part_offset column. static void buildKeyConditionFromPartOffset( - std::optional & part_offset_condition, const ActionsDAGPtr & filter_dag, ContextPtr context); + std::optional & part_offset_condition, const ActionsDAG * filter_dag, ContextPtr context); /// If possible, filter using expression on virtual columns. /// Example: SELECT count() FROM table WHERE _part = 'part_name' @@ -170,7 +170,7 @@ public: const StorageMetadataPtr & metadata_snapshot, const MergeTreeData & data, const MergeTreeData::DataPartsVector & parts, - const ActionsDAGPtr & filter_dag, + const ActionsDAG * filter_dag, ContextPtr context); /// Filter parts using minmax index and partition key. diff --git a/src/Storages/MergeTree/MergeTreeIndexAnnoy.cpp b/src/Storages/MergeTree/MergeTreeIndexAnnoy.cpp index e492ca0aec2..457c85eaa46 100644 --- a/src/Storages/MergeTree/MergeTreeIndexAnnoy.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexAnnoy.cpp @@ -332,7 +332,7 @@ MergeTreeIndexConditionPtr MergeTreeIndexAnnoy::createIndexCondition(const Selec return std::make_shared(index, query, distance_function, context); }; -MergeTreeIndexConditionPtr MergeTreeIndexAnnoy::createIndexCondition(const ActionsDAGPtr &, ContextPtr) const +MergeTreeIndexConditionPtr MergeTreeIndexAnnoy::createIndexCondition(const ActionsDAG *, ContextPtr) const { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "MergeTreeIndexAnnoy cannot be created with ActionsDAG"); } diff --git a/src/Storages/MergeTree/MergeTreeIndexAnnoy.h b/src/Storages/MergeTree/MergeTreeIndexAnnoy.h index d511ab84859..282920c608e 100644 --- a/src/Storages/MergeTree/MergeTreeIndexAnnoy.h +++ b/src/Storages/MergeTree/MergeTreeIndexAnnoy.h @@ -99,7 +99,7 @@ public: MergeTreeIndexGranulePtr createIndexGranule() const override; MergeTreeIndexAggregatorPtr createIndexAggregator(const MergeTreeWriterSettings & settings) const override; MergeTreeIndexConditionPtr createIndexCondition(const SelectQueryInfo & query, ContextPtr context) const; - MergeTreeIndexConditionPtr createIndexCondition(const ActionsDAGPtr &, ContextPtr) const override; + MergeTreeIndexConditionPtr createIndexCondition(const ActionsDAG *, ContextPtr) const override; bool isVectorSearch() const override { return true; } private: diff --git a/src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp b/src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp index fc5147bb56c..c6a00751f25 100644 --- a/src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp @@ -201,7 +201,7 @@ bool maybeTrueOnBloomFilter(const IColumn * hash_column, const BloomFilterPtr & } MergeTreeIndexConditionBloomFilter::MergeTreeIndexConditionBloomFilter( - const ActionsDAGPtr & filter_actions_dag, ContextPtr context_, const Block & header_, size_t hash_functions_) + const ActionsDAG * filter_actions_dag, ContextPtr context_, const Block & header_, size_t hash_functions_) : WithContext(context_), header(header_), hash_functions(hash_functions_) { if (!filter_actions_dag) @@ -897,7 +897,7 @@ MergeTreeIndexAggregatorPtr MergeTreeIndexBloomFilter::createIndexAggregator(con return std::make_shared(bits_per_row, hash_functions, index.column_names); } -MergeTreeIndexConditionPtr MergeTreeIndexBloomFilter::createIndexCondition(const ActionsDAGPtr & filter_actions_dag, ContextPtr context) const +MergeTreeIndexConditionPtr MergeTreeIndexBloomFilter::createIndexCondition(const ActionsDAG * filter_actions_dag, ContextPtr context) const { return std::make_shared(filter_actions_dag, context, index.sample_block, hash_functions); } diff --git a/src/Storages/MergeTree/MergeTreeIndexBloomFilter.h b/src/Storages/MergeTree/MergeTreeIndexBloomFilter.h index d66c4b8b6ca..bd1b137176a 100644 --- a/src/Storages/MergeTree/MergeTreeIndexBloomFilter.h +++ b/src/Storages/MergeTree/MergeTreeIndexBloomFilter.h @@ -69,7 +69,7 @@ public: std::vector> predicate; }; - MergeTreeIndexConditionBloomFilter(const ActionsDAGPtr & filter_actions_dag, ContextPtr context_, const Block & header_, size_t hash_functions_); + MergeTreeIndexConditionBloomFilter(const ActionsDAG * filter_actions_dag, ContextPtr context_, const Block & header_, size_t hash_functions_); bool alwaysUnknownOrTrue() const override; @@ -142,7 +142,7 @@ public: MergeTreeIndexAggregatorPtr createIndexAggregator(const MergeTreeWriterSettings & settings) const override; - MergeTreeIndexConditionPtr createIndexCondition(const ActionsDAGPtr & filter_actions_dag, ContextPtr context) const override; + MergeTreeIndexConditionPtr createIndexCondition(const ActionsDAG * filter_actions_dag, ContextPtr context) const override; private: size_t bits_per_row; diff --git a/src/Storages/MergeTree/MergeTreeIndexBloomFilterText.cpp b/src/Storages/MergeTree/MergeTreeIndexBloomFilterText.cpp index 8cf58687125..5b6813d12e3 100644 --- a/src/Storages/MergeTree/MergeTreeIndexBloomFilterText.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexBloomFilterText.cpp @@ -138,7 +138,7 @@ void MergeTreeIndexAggregatorBloomFilterText::update(const Block & block, size_t } MergeTreeConditionBloomFilterText::MergeTreeConditionBloomFilterText( - const ActionsDAGPtr & filter_actions_dag, + const ActionsDAG * filter_actions_dag, ContextPtr context, const Block & index_sample_block, const BloomFilterParameters & params_, @@ -733,7 +733,7 @@ MergeTreeIndexAggregatorPtr MergeTreeIndexBloomFilterText::createIndexAggregator } MergeTreeIndexConditionPtr MergeTreeIndexBloomFilterText::createIndexCondition( - const ActionsDAGPtr & filter_dag, ContextPtr context) const + const ActionsDAG * filter_dag, ContextPtr context) const { return std::make_shared(filter_dag, context, index.sample_block, params, token_extractor.get()); } diff --git a/src/Storages/MergeTree/MergeTreeIndexBloomFilterText.h b/src/Storages/MergeTree/MergeTreeIndexBloomFilterText.h index 6fd969030df..fe042884550 100644 --- a/src/Storages/MergeTree/MergeTreeIndexBloomFilterText.h +++ b/src/Storages/MergeTree/MergeTreeIndexBloomFilterText.h @@ -62,7 +62,7 @@ class MergeTreeConditionBloomFilterText final : public IMergeTreeIndexCondition { public: MergeTreeConditionBloomFilterText( - const ActionsDAGPtr & filter_actions_dag, + const ActionsDAG * filter_actions_dag, ContextPtr context, const Block & index_sample_block, const BloomFilterParameters & params_, @@ -163,7 +163,7 @@ public: MergeTreeIndexAggregatorPtr createIndexAggregator(const MergeTreeWriterSettings & settings) const override; MergeTreeIndexConditionPtr createIndexCondition( - const ActionsDAGPtr & filter_dag, ContextPtr context) const override; + const ActionsDAG * filter_dag, ContextPtr context) const override; BloomFilterParameters params; /// Function for selecting next token. diff --git a/src/Storages/MergeTree/MergeTreeIndexFullText.cpp b/src/Storages/MergeTree/MergeTreeIndexFullText.cpp index 47ce24b91eb..cd6af68ebcc 100644 --- a/src/Storages/MergeTree/MergeTreeIndexFullText.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexFullText.cpp @@ -186,7 +186,7 @@ void MergeTreeIndexAggregatorFullText::update(const Block & block, size_t * pos, } MergeTreeConditionFullText::MergeTreeConditionFullText( - const ActionsDAGPtr & filter_actions_dag, + const ActionsDAG * filter_actions_dag, ContextPtr context_, const Block & index_sample_block, const GinFilterParameters & params_, @@ -768,7 +768,7 @@ MergeTreeIndexAggregatorPtr MergeTreeIndexFullText::createIndexAggregatorForPart } MergeTreeIndexConditionPtr MergeTreeIndexFullText::createIndexCondition( - const ActionsDAGPtr & filter_actions_dag, ContextPtr context) const + const ActionsDAG * filter_actions_dag, ContextPtr context) const { return std::make_shared(filter_actions_dag, context, index.sample_block, params, token_extractor.get()); }; diff --git a/src/Storages/MergeTree/MergeTreeIndexFullText.h b/src/Storages/MergeTree/MergeTreeIndexFullText.h index 1a5e848e5ac..8e0b1a22acb 100644 --- a/src/Storages/MergeTree/MergeTreeIndexFullText.h +++ b/src/Storages/MergeTree/MergeTreeIndexFullText.h @@ -63,7 +63,7 @@ class MergeTreeConditionFullText final : public IMergeTreeIndexCondition, WithCo { public: MergeTreeConditionFullText( - const ActionsDAGPtr & filter_actions_dag, + const ActionsDAG * filter_actions_dag, ContextPtr context, const Block & index_sample_block, const GinFilterParameters & params_, @@ -170,7 +170,7 @@ public: MergeTreeIndexGranulePtr createIndexGranule() const override; MergeTreeIndexAggregatorPtr createIndexAggregator(const MergeTreeWriterSettings & settings) const override; MergeTreeIndexAggregatorPtr createIndexAggregatorForPart(const GinIndexStorePtr & store, const MergeTreeWriterSettings & /*settings*/) const override; - MergeTreeIndexConditionPtr createIndexCondition(const ActionsDAGPtr & filter_actions_dag, ContextPtr context) const override; + MergeTreeIndexConditionPtr createIndexCondition(const ActionsDAG * filter_actions_dag, ContextPtr context) const override; GinFilterParameters params; /// Function for selecting next token. diff --git a/src/Storages/MergeTree/MergeTreeIndexHypothesis.cpp b/src/Storages/MergeTree/MergeTreeIndexHypothesis.cpp index 0995e2724ec..cd8065ecadf 100644 --- a/src/Storages/MergeTree/MergeTreeIndexHypothesis.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexHypothesis.cpp @@ -79,7 +79,7 @@ MergeTreeIndexAggregatorPtr MergeTreeIndexHypothesis::createIndexAggregator(cons } MergeTreeIndexConditionPtr MergeTreeIndexHypothesis::createIndexCondition( - const ActionsDAGPtr &, ContextPtr) const + const ActionsDAG *, ContextPtr) const { throw Exception(ErrorCodes::LOGICAL_ERROR, "Not supported"); } diff --git a/src/Storages/MergeTree/MergeTreeIndexHypothesis.h b/src/Storages/MergeTree/MergeTreeIndexHypothesis.h index 130e708d76f..e60335fe724 100644 --- a/src/Storages/MergeTree/MergeTreeIndexHypothesis.h +++ b/src/Storages/MergeTree/MergeTreeIndexHypothesis.h @@ -69,7 +69,7 @@ public: MergeTreeIndexAggregatorPtr createIndexAggregator(const MergeTreeWriterSettings & settings) const override; MergeTreeIndexConditionPtr createIndexCondition( - const ActionsDAGPtr & filter_actions_dag, ContextPtr context) const override; + const ActionsDAG * filter_actions_dag, ContextPtr context) const override; MergeTreeIndexMergedConditionPtr createIndexMergedCondition( const SelectQueryInfo & query_info, StorageMetadataPtr storage_metadata) const override; diff --git a/src/Storages/MergeTree/MergeTreeIndexMinMax.cpp b/src/Storages/MergeTree/MergeTreeIndexMinMax.cpp index 20dfed8cf8f..c60d63a59ba 100644 --- a/src/Storages/MergeTree/MergeTreeIndexMinMax.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexMinMax.cpp @@ -157,7 +157,7 @@ void MergeTreeIndexAggregatorMinMax::update(const Block & block, size_t * pos, s namespace { -KeyCondition buildCondition(const IndexDescription & index, const ActionsDAGPtr & filter_actions_dag, ContextPtr context) +KeyCondition buildCondition(const IndexDescription & index, const ActionsDAG * filter_actions_dag, ContextPtr context) { return KeyCondition{filter_actions_dag, context, index.column_names, index.expression}; } @@ -165,7 +165,7 @@ KeyCondition buildCondition(const IndexDescription & index, const ActionsDAGPtr } MergeTreeIndexConditionMinMax::MergeTreeIndexConditionMinMax( - const IndexDescription & index, const ActionsDAGPtr & filter_actions_dag, ContextPtr context) + const IndexDescription & index, const ActionsDAG * filter_actions_dag, ContextPtr context) : index_data_types(index.data_types) , condition(buildCondition(index, filter_actions_dag, context)) { @@ -198,7 +198,7 @@ MergeTreeIndexAggregatorPtr MergeTreeIndexMinMax::createIndexAggregator(const Me } MergeTreeIndexConditionPtr MergeTreeIndexMinMax::createIndexCondition( - const ActionsDAGPtr & filter_actions_dag, ContextPtr context) const + const ActionsDAG * filter_actions_dag, ContextPtr context) const { return std::make_shared(index, filter_actions_dag, context); } diff --git a/src/Storages/MergeTree/MergeTreeIndexMinMax.h b/src/Storages/MergeTree/MergeTreeIndexMinMax.h index dca26fb7b28..c5031ccbb27 100644 --- a/src/Storages/MergeTree/MergeTreeIndexMinMax.h +++ b/src/Storages/MergeTree/MergeTreeIndexMinMax.h @@ -50,7 +50,7 @@ class MergeTreeIndexConditionMinMax final : public IMergeTreeIndexCondition public: MergeTreeIndexConditionMinMax( const IndexDescription & index, - const ActionsDAGPtr & filter_actions_dag, + const ActionsDAG * filter_actions_dag, ContextPtr context); bool alwaysUnknownOrTrue() const override; @@ -77,7 +77,7 @@ public: MergeTreeIndexAggregatorPtr createIndexAggregator(const MergeTreeWriterSettings & settings) const override; MergeTreeIndexConditionPtr createIndexCondition( - const ActionsDAGPtr & filter_actions_dag, ContextPtr context) const override; + const ActionsDAG * filter_actions_dag, ContextPtr context) const override; const char* getSerializedFileExtension() const override { return ".idx2"; } MergeTreeIndexFormat getDeserializedFormat(const IDataPartStorage & data_part_storage, const std::string & path_prefix) const override; /// NOLINT diff --git a/src/Storages/MergeTree/MergeTreeIndexSet.cpp b/src/Storages/MergeTree/MergeTreeIndexSet.cpp index b11cbf1e034..7c65381b05b 100644 --- a/src/Storages/MergeTree/MergeTreeIndexSet.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexSet.cpp @@ -245,7 +245,7 @@ MergeTreeIndexConditionSet::MergeTreeIndexConditionSet( const String & index_name_, const Block & index_sample_block, size_t max_rows_, - const ActionsDAGPtr & filter_dag, + const ActionsDAG * filter_dag, ContextPtr context) : index_name(index_name_) , max_rows(max_rows_) @@ -272,9 +272,9 @@ MergeTreeIndexConditionSet::MergeTreeIndexConditionSet( filter_actions_dag->getOutputs()[0] = &traverseDAG(*filter_actions_dag_node, filter_actions_dag, context, node_to_result_node); filter_actions_dag->removeUnusedActions(); - actions = std::make_shared(filter_actions_dag); actions_output_column_name = filter_actions_dag->getOutputs().at(0)->result_name; + actions = std::make_shared(std::move(filter_actions_dag)); } bool MergeTreeIndexConditionSet::alwaysUnknownOrTrue() const @@ -544,7 +544,7 @@ MergeTreeIndexAggregatorPtr MergeTreeIndexSet::createIndexAggregator(const Merge } MergeTreeIndexConditionPtr MergeTreeIndexSet::createIndexCondition( - const ActionsDAGPtr & filter_actions_dag, ContextPtr context) const + const ActionsDAG * filter_actions_dag, ContextPtr context) const { return std::make_shared(index.name, index.sample_block, max_rows, filter_actions_dag, context); } diff --git a/src/Storages/MergeTree/MergeTreeIndexSet.h b/src/Storages/MergeTree/MergeTreeIndexSet.h index 6efc2effafd..abd40b3cf9d 100644 --- a/src/Storages/MergeTree/MergeTreeIndexSet.h +++ b/src/Storages/MergeTree/MergeTreeIndexSet.h @@ -83,7 +83,7 @@ public: const String & index_name_, const Block & index_sample_block, size_t max_rows_, - const ActionsDAGPtr & filter_dag, + const ActionsDAG * filter_dag, ContextPtr context); bool alwaysUnknownOrTrue() const override; @@ -138,7 +138,7 @@ public: MergeTreeIndexAggregatorPtr createIndexAggregator(const MergeTreeWriterSettings & settings) const override; MergeTreeIndexConditionPtr createIndexCondition( - const ActionsDAGPtr & filter_actions_dag, ContextPtr context) const override; + const ActionsDAG * filter_actions_dag, ContextPtr context) const override; size_t max_rows = 0; }; diff --git a/src/Storages/MergeTree/MergeTreeIndexUSearch.cpp b/src/Storages/MergeTree/MergeTreeIndexUSearch.cpp index c9df7210569..59a4b0fbf9c 100644 --- a/src/Storages/MergeTree/MergeTreeIndexUSearch.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexUSearch.cpp @@ -367,7 +367,7 @@ MergeTreeIndexConditionPtr MergeTreeIndexUSearch::createIndexCondition(const Sel return std::make_shared(index, query, distance_function, context); }; -MergeTreeIndexConditionPtr MergeTreeIndexUSearch::createIndexCondition(const ActionsDAGPtr &, ContextPtr) const +MergeTreeIndexConditionPtr MergeTreeIndexUSearch::createIndexCondition(const ActionsDAG *, ContextPtr) const { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "MergeTreeIndexAnnoy cannot be created with ActionsDAG"); } diff --git a/src/Storages/MergeTree/MergeTreeIndexUSearch.h b/src/Storages/MergeTree/MergeTreeIndexUSearch.h index 5107cfee371..41de94402c9 100644 --- a/src/Storages/MergeTree/MergeTreeIndexUSearch.h +++ b/src/Storages/MergeTree/MergeTreeIndexUSearch.h @@ -101,7 +101,7 @@ public: MergeTreeIndexGranulePtr createIndexGranule() const override; MergeTreeIndexAggregatorPtr createIndexAggregator(const MergeTreeWriterSettings & settings) const override; MergeTreeIndexConditionPtr createIndexCondition(const SelectQueryInfo & query, ContextPtr context) const; - MergeTreeIndexConditionPtr createIndexCondition(const ActionsDAGPtr &, ContextPtr) const override; + MergeTreeIndexConditionPtr createIndexCondition(const ActionsDAG *, ContextPtr) const override; bool isVectorSearch() const override { return true; } private: diff --git a/src/Storages/MergeTree/MergeTreeIndices.h b/src/Storages/MergeTree/MergeTreeIndices.h index a9f1fa9378f..1be73e1c811 100644 --- a/src/Storages/MergeTree/MergeTreeIndices.h +++ b/src/Storages/MergeTree/MergeTreeIndices.h @@ -167,7 +167,7 @@ struct IMergeTreeIndex } virtual MergeTreeIndexConditionPtr createIndexCondition( - const ActionsDAGPtr & filter_actions_dag, ContextPtr context) const = 0; + const ActionsDAG * filter_actions_dag, ContextPtr context) const = 0; virtual bool isVectorSearch() const { return false; } diff --git a/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp b/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp index 78b67de1a7e..8fa5b2cc955 100644 --- a/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp +++ b/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp @@ -80,7 +80,7 @@ PrewhereExprInfo MergeTreeSelectProcessor::getPrewhereActions(PrewhereInfoPtr pr PrewhereExprStep row_level_filter_step { .type = PrewhereExprStep::Filter, - .actions = std::make_shared(prewhere_info->row_level_filter, actions_settings), + .actions = std::make_shared(prewhere_info->row_level_filter->clone(), actions_settings), .filter_column_name = prewhere_info->row_level_column_name, .remove_filter_column = true, .need_filter = true, @@ -96,7 +96,7 @@ PrewhereExprInfo MergeTreeSelectProcessor::getPrewhereActions(PrewhereInfoPtr pr PrewhereExprStep prewhere_step { .type = PrewhereExprStep::Filter, - .actions = std::make_shared(prewhere_info->prewhere_actions, actions_settings), + .actions = std::make_shared(prewhere_info->prewhere_actions->clone(), actions_settings), .filter_column_name = prewhere_info->prewhere_column_name, .remove_filter_column = prewhere_info->remove_prewhere_column, .need_filter = prewhere_info->need_filter, diff --git a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp index 02f8d6f4f6a..98b35a3ca2c 100644 --- a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp +++ b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp @@ -376,7 +376,7 @@ public: { const auto & primary_key = storage_snapshot->metadata->getPrimaryKey(); const Names & primary_key_column_names = primary_key.column_names; - KeyCondition key_condition(filter, context, primary_key_column_names, primary_key.expression); + KeyCondition key_condition(filter.get(), context, primary_key_column_names, primary_key.expression); LOG_DEBUG(log, "Key condition: {}", key_condition.toString()); if (!key_condition.alwaysFalse()) @@ -437,7 +437,7 @@ void createReadFromPartStep( auto reading = std::make_unique(type, storage, storage_snapshot, std::move(data_part), std::move(columns_to_read), apply_deleted_mask, - filter, std::move(context), log); + std::move(filter), std::move(context), log); plan.addStep(std::move(reading)); } diff --git a/src/Storages/MergeTree/MergeTreeSplitPrewhereIntoReadSteps.cpp b/src/Storages/MergeTree/MergeTreeSplitPrewhereIntoReadSteps.cpp index 43e3b0c505a..25596b42951 100644 --- a/src/Storages/MergeTree/MergeTreeSplitPrewhereIntoReadSteps.cpp +++ b/src/Storages/MergeTree/MergeTreeSplitPrewhereIntoReadSteps.cpp @@ -50,7 +50,7 @@ void fillRequiredColumns(const ActionsDAG::Node * node, std::unordered_map; const ActionsDAG::Node & addClonedDAGToDAG( size_t step, const ActionsDAG::Node * original_dag_node, - ActionsDAGPtr new_dag, + const ActionsDAGPtr & new_dag, OriginalToNewNodeMap & node_remap, NodeNameToLastUsedStepMap & node_to_step_map) { @@ -72,7 +72,7 @@ const ActionsDAG::Node & addClonedDAGToDAG( { /// If the node is already in the new DAG, return it const auto & node_ref = node_remap.at(node_name); - if (node_ref.dag == new_dag) + if (node_ref.dag == new_dag.get()) return *node_ref.node; /// If the node is known from the previous steps, add it as an input, except for constants @@ -80,7 +80,7 @@ const ActionsDAG::Node & addClonedDAGToDAG( { node_ref.dag->addOrReplaceInOutputs(*node_ref.node); const auto & new_node = new_dag->addInput(node_ref.node->result_name, node_ref.node->result_type); - node_remap[node_name] = {new_dag, &new_node}; /// TODO: here we update the node reference. Is it always correct? + node_remap[node_name] = {new_dag.get(), &new_node}; /// TODO: here we update the node reference. Is it always correct? /// Remember the index of the last step which reuses this node. /// We cannot remove this node from the outputs before that step. @@ -93,7 +93,7 @@ const ActionsDAG::Node & addClonedDAGToDAG( if (original_dag_node->type == ActionsDAG::ActionType::INPUT) { const auto & new_node = new_dag->addInput(original_dag_node->result_name, original_dag_node->result_type); - node_remap[node_name] = {new_dag, &new_node}; + node_remap[node_name] = {new_dag.get(), &new_node}; return new_node; } @@ -102,7 +102,7 @@ const ActionsDAG::Node & addClonedDAGToDAG( { const auto & new_node = new_dag->addColumn( ColumnWithTypeAndName(original_dag_node->column, original_dag_node->result_type, original_dag_node->result_name)); - node_remap[node_name] = {new_dag, &new_node}; + node_remap[node_name] = {new_dag.get(), &new_node}; return new_node; } @@ -110,7 +110,7 @@ const ActionsDAG::Node & addClonedDAGToDAG( { const auto & alias_child = addClonedDAGToDAG(step, original_dag_node->children[0], new_dag, node_remap, node_to_step_map); const auto & new_node = new_dag->addAlias(alias_child, original_dag_node->result_name); - node_remap[node_name] = {new_dag, &new_node}; + node_remap[node_name] = {new_dag.get(), &new_node}; return new_node; } @@ -125,7 +125,7 @@ const ActionsDAG::Node & addClonedDAGToDAG( } const auto & new_node = new_dag->addFunction(original_dag_node->function_base, new_children, original_dag_node->result_name); - node_remap[node_name] = {new_dag, &new_node}; + node_remap[node_name] = {new_dag.get(), &new_node}; return new_node; } @@ -133,13 +133,13 @@ const ActionsDAG::Node & addClonedDAGToDAG( } const ActionsDAG::Node & addFunction( - ActionsDAGPtr new_dag, + const ActionsDAGPtr & new_dag, const FunctionOverloadResolverPtr & function, ActionsDAG::NodeRawConstPtrs children, OriginalToNewNodeMap & node_remap) { const auto & new_node = new_dag->addFunction(function, children, ""); - node_remap[new_node.result_name] = {new_dag, &new_node}; + node_remap[new_node.result_name] = {new_dag.get(), &new_node}; return new_node; } @@ -147,7 +147,7 @@ const ActionsDAG::Node & addFunction( /// This is different from ActionsDAG::addCast() because it set the name equal to the original name effectively hiding the value before cast, /// but it might be required for further steps with its original uncasted type. const ActionsDAG::Node & addCast( - ActionsDAGPtr dag, + const ActionsDAGPtr & dag, const ActionsDAG::Node & node_to_cast, const String & type_name, OriginalToNewNodeMap & node_remap) @@ -173,7 +173,7 @@ const ActionsDAG::Node & addCast( /// 1. produces a result with the proper Nullable or non-Nullable UInt8 type and /// 2. makes sure that the result contains only 0 or 1 values even if the source column contains non-boolean values. const ActionsDAG::Node & addAndTrue( - ActionsDAGPtr dag, + const ActionsDAGPtr & dag, const ActionsDAG::Node & filter_node_to_normalize, OriginalToNewNodeMap & node_remap) { @@ -258,7 +258,7 @@ bool tryBuildPrewhereSteps(PrewhereInfoPtr prewhere_info, const ExpressionAction for (size_t step_index = 0; step_index < condition_groups.size(); ++step_index) { const auto & condition_group = condition_groups[step_index]; - ActionsDAGPtr step_dag = std::make_shared(); + ActionsDAGPtr step_dag = std::make_unique(); String result_name; std::vector new_condition_nodes; @@ -299,7 +299,7 @@ bool tryBuildPrewhereSteps(PrewhereInfoPtr prewhere_info, const ExpressionAction } } - steps.push_back({step_dag, result_name}); + steps.push_back({std::move(step_dag), result_name}); } /// 6. Find all outputs of the original DAG @@ -345,11 +345,11 @@ bool tryBuildPrewhereSteps(PrewhereInfoPtr prewhere_info, const ExpressionAction { for (size_t step_index = 0; step_index < steps.size(); ++step_index) { - const auto & step = steps[step_index]; + auto & step = steps[step_index]; PrewhereExprStep new_step { .type = PrewhereExprStep::Filter, - .actions = std::make_shared(step.actions, actions_settings), + .actions = std::make_shared(std::move(step.actions), actions_settings), .filter_column_name = step.column_name, /// Don't remove if it's in the list of original outputs .remove_filter_column = diff --git a/src/Storages/MergeTree/PartitionPruner.cpp b/src/Storages/MergeTree/PartitionPruner.cpp index 9de7b238f57..6df7b5aa054 100644 --- a/src/Storages/MergeTree/PartitionPruner.cpp +++ b/src/Storages/MergeTree/PartitionPruner.cpp @@ -4,7 +4,7 @@ namespace DB { -PartitionPruner::PartitionPruner(const StorageMetadataPtr & metadata, ActionsDAGPtr filter_actions_dag, ContextPtr context, bool strict) +PartitionPruner::PartitionPruner(const StorageMetadataPtr & metadata, const ActionsDAG * filter_actions_dag, ContextPtr context, bool strict) : partition_key(MergeTreePartition::adjustPartitionKey(metadata, context)) , partition_condition(filter_actions_dag, context, partition_key.column_names, partition_key.expression, true /* single_point */) , useless((strict && partition_condition.isRelaxed()) || partition_condition.alwaysUnknownOrTrue()) diff --git a/src/Storages/MergeTree/PartitionPruner.h b/src/Storages/MergeTree/PartitionPruner.h index ca24559ca01..d89dfb7b245 100644 --- a/src/Storages/MergeTree/PartitionPruner.h +++ b/src/Storages/MergeTree/PartitionPruner.h @@ -13,7 +13,7 @@ namespace DB class PartitionPruner { public: - PartitionPruner(const StorageMetadataPtr & metadata, ActionsDAGPtr filter_actions_dag, ContextPtr context, bool strict = false); + PartitionPruner(const StorageMetadataPtr & metadata, const ActionsDAG * filter_actions_dag, ContextPtr context, bool strict = false); bool canBePruned(const IMergeTreeDataPart & part) const; diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp index 2fc6993369d..f640fb9ba0a 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp @@ -79,7 +79,7 @@ StorageObjectStorageSource::~StorageObjectStorageSource() void StorageObjectStorageSource::setKeyCondition(const ActionsDAGPtr & filter_actions_dag, ContextPtr context_) { - setKeyConditionImpl(filter_actions_dag, context_, read_from_format_info.format_header); + setKeyConditionImpl(filter_actions_dag.get(), context_, read_from_format_info.format_header); } std::string StorageObjectStorageSource::getUniqueStoragePathIdentifier( diff --git a/src/Storages/SelectQueryInfo.h b/src/Storages/SelectQueryInfo.h index 52b6674c93d..654b8b788fe 100644 --- a/src/Storages/SelectQueryInfo.h +++ b/src/Storages/SelectQueryInfo.h @@ -19,7 +19,7 @@ class ExpressionActions; using ExpressionActionsPtr = std::shared_ptr; class ActionsDAG; -using ActionsDAGPtr = std::shared_ptr; +using ActionsDAGPtr = std::unique_ptr; struct PrewhereInfo; using PrewhereInfoPtr = std::shared_ptr; @@ -192,7 +192,7 @@ struct SelectQueryInfo ASTPtr parallel_replica_custom_key_ast; /// Filter actions dag for current storage - ActionsDAGPtr filter_actions_dag; + std::shared_ptr filter_actions_dag; ReadInOrderOptimizerPtr order_optimizer; /// Can be modified while reading from storage diff --git a/src/Storages/StorageBuffer.cpp b/src/Storages/StorageBuffer.cpp index a3f6b6afc5d..9bddf4f0230 100644 --- a/src/Storages/StorageBuffer.cpp +++ b/src/Storages/StorageBuffer.cpp @@ -432,7 +432,7 @@ void StorageBuffer::read( { return std::make_shared( header, - std::make_shared(query_info.prewhere_info->row_level_filter, actions_settings), + std::make_shared(query_info.prewhere_info->row_level_filter->clone(), actions_settings), query_info.prewhere_info->row_level_column_name, false); }); @@ -442,7 +442,7 @@ void StorageBuffer::read( { return std::make_shared( header, - std::make_shared(query_info.prewhere_info->prewhere_actions, actions_settings), + std::make_shared(query_info.prewhere_info->prewhere_actions->clone(), actions_settings), query_info.prewhere_info->prewhere_column_name, query_info.prewhere_info->remove_prewhere_column); }); diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index 5048ef4788e..be421e8e2bc 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -1120,7 +1120,7 @@ static ActionsDAGPtr getFilterFromQuery(const ASTPtr & ast, ContextPtr context) if (!source) return nullptr; - return source->getFilterActionsDAG(); + return source->detachFilterActionsDAG(); } diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index aaf84f6f82c..702c257bfb6 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -1235,7 +1235,7 @@ StorageFileSource::~StorageFileSource() void StorageFileSource::setKeyCondition(const ActionsDAGPtr & filter_actions_dag, ContextPtr context_) { - setKeyConditionImpl(filter_actions_dag, context_, block_for_format); + setKeyConditionImpl(filter_actions_dag.get(), context_, block_for_format); } diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp index e2f92f08d7a..c42e3058347 100644 --- a/src/Storages/StorageMerge.cpp +++ b/src/Storages/StorageMerge.cpp @@ -1235,7 +1235,7 @@ ReadFromMerge::RowPolicyData::RowPolicyData(RowPolicyFilterPtr row_policy_filter auto expression_analyzer = ExpressionAnalyzer{expr, syntax_result, local_context}; actions_dag = expression_analyzer.getActionsDAG(false /* add_aliases */, false /* project_result */); - filter_actions = std::make_shared(actions_dag, + filter_actions = std::make_shared(std::move(actions_dag), ExpressionActionsSettings::fromContext(local_context, CompileExpressions::yes)); const auto & required_columns = filter_actions->getRequiredColumnsWithTypes(); const auto & sample_block_columns = filter_actions->getSampleBlock().getNamesAndTypesList(); @@ -1273,12 +1273,12 @@ void ReadFromMerge::RowPolicyData::extendNames(Names & names) const void ReadFromMerge::RowPolicyData::addStorageFilter(SourceStepWithFilter * step) const { - step->addFilter(actions_dag, filter_column_name); + step->addFilter(actions_dag->clone(), filter_column_name); } void ReadFromMerge::RowPolicyData::addFilterTransform(QueryPlan & plan) const { - auto filter_step = std::make_unique(plan.getCurrentDataStream(), actions_dag, filter_column_name, true /* remove filter column */); + auto filter_step = std::make_unique(plan.getCurrentDataStream(), actions_dag->clone(), filter_column_name, true /* remove filter column */); plan.addStep(std::move(filter_step)); } @@ -1471,7 +1471,7 @@ void ReadFromMerge::convertAndFilterSourceStream( { pipe_columns.emplace_back(NameAndTypePair(alias.name, alias.type)); - auto actions_dag = std::make_shared(pipe_columns); + auto actions_dag = std::make_unique(pipe_columns); QueryTreeNodePtr query_tree = buildQueryTree(alias.expression, local_context); query_tree->setAlias(alias.name); @@ -1486,7 +1486,7 @@ void ReadFromMerge::convertAndFilterSourceStream( throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected to have 1 output but got {}", nodes.size()); actions_dag->addOrReplaceInOutputs(actions_dag->addAlias(*nodes.front(), alias.name)); - auto expression_step = std::make_unique(child.plan.getCurrentDataStream(), actions_dag); + auto expression_step = std::make_unique(child.plan.getCurrentDataStream(), std::move(actions_dag)); child.plan.addStep(std::move(expression_step)); } } diff --git a/src/Storages/StorageURL.h b/src/Storages/StorageURL.h index f550ccb2bc4..c336f597f41 100644 --- a/src/Storages/StorageURL.h +++ b/src/Storages/StorageURL.h @@ -187,7 +187,7 @@ public: void setKeyCondition(const ActionsDAGPtr & filter_actions_dag, ContextPtr context_) override { - setKeyConditionImpl(filter_actions_dag, context_, block_for_format); + setKeyConditionImpl(filter_actions_dag.get(), context_, block_for_format); } Chunk generate() override; diff --git a/src/Storages/StorageValues.cpp b/src/Storages/StorageValues.cpp index 894b1404a21..4d73f8e5c87 100644 --- a/src/Storages/StorageValues.cpp +++ b/src/Storages/StorageValues.cpp @@ -48,14 +48,14 @@ Pipe StorageValues::read( if (!prepared_pipe.empty()) { - auto dag = std::make_shared(prepared_pipe.getHeader().getColumnsWithTypeAndName()); + auto dag = std::make_unique(prepared_pipe.getHeader().getColumnsWithTypeAndName()); ActionsDAG::NodeRawConstPtrs outputs; outputs.reserve(column_names.size()); for (const auto & name : column_names) outputs.push_back(dag->getOutputs()[prepared_pipe.getHeader().getPositionByName(name)]); dag->getOutputs().swap(outputs); - auto expression = std::make_shared(dag); + auto expression = std::make_shared(std::move(dag)); prepared_pipe.addSimpleTransform([&](const Block & header) { diff --git a/src/Storages/StorageView.cpp b/src/Storages/StorageView.cpp index 016de94c17c..2c0d5c5ca85 100644 --- a/src/Storages/StorageView.cpp +++ b/src/Storages/StorageView.cpp @@ -177,7 +177,7 @@ void StorageView::read( /// It's expected that the columns read from storage are not constant. /// Because method 'getSampleBlockForColumns' is used to obtain a structure of result in InterpreterSelectQuery. - auto materializing_actions = std::make_shared(query_plan.getCurrentDataStream().header.getColumnsWithTypeAndName()); + auto materializing_actions = std::make_unique(query_plan.getCurrentDataStream().header.getColumnsWithTypeAndName()); materializing_actions->addMaterializingOutputActions(); auto materializing = std::make_unique(query_plan.getCurrentDataStream(), std::move(materializing_actions)); diff --git a/src/Storages/TTLDescription.cpp b/src/Storages/TTLDescription.cpp index f831465277d..56f65b57367 100644 --- a/src/Storages/TTLDescription.cpp +++ b/src/Storages/TTLDescription.cpp @@ -183,7 +183,7 @@ static ExpressionAndSets buildExpressionAndSets(ASTPtr & ast, const NamesAndType dag->getOutputs() = {col}; dag->removeUnusedActions(); - result.expression = std::make_shared(dag, ExpressionActionsSettings::fromContext(context_copy)); + result.expression = std::make_shared(std::move(dag), ExpressionActionsSettings::fromContext(context_copy)); result.sets = analyzer.getPreparedSets(); return result; diff --git a/src/Storages/VirtualColumnUtils.cpp b/src/Storages/VirtualColumnUtils.cpp index 778c9e13adb..6f7d1d4c39f 100644 --- a/src/Storages/VirtualColumnUtils.cpp +++ b/src/Storages/VirtualColumnUtils.cpp @@ -77,10 +77,10 @@ void buildSetsForDAG(const ActionsDAGPtr & dag, const ContextPtr & context) } } -void filterBlockWithDAG(ActionsDAGPtr dag, Block & block, ContextPtr context) +void filterBlockWithDAG(const ActionsDAGPtr & dag, Block & block, ContextPtr context) { buildSetsForDAG(dag, context); - auto actions = std::make_shared(dag); + auto actions = std::make_shared(dag->clone()); Block block_with_filter = block; actions->execute(block_with_filter, /*dry_run=*/ false, /*allow_duplicates_in_input=*/ true); diff --git a/src/Storages/VirtualColumnUtils.h b/src/Storages/VirtualColumnUtils.h index fbfbdd6c6cc..0cf8470bc60 100644 --- a/src/Storages/VirtualColumnUtils.h +++ b/src/Storages/VirtualColumnUtils.h @@ -23,7 +23,7 @@ namespace VirtualColumnUtils void filterBlockWithPredicate(const ActionsDAG::Node * predicate, Block & block, ContextPtr context); /// Just filters block. Block should contain all the required columns. -void filterBlockWithDAG(ActionsDAGPtr dag, Block & block, ContextPtr context); +void filterBlockWithDAG(const ActionsDAGPtr & dag, Block & block, ContextPtr context); /// Builds sets used by ActionsDAG inplace. void buildSetsForDAG(const ActionsDAGPtr & dag, const ContextPtr & context); diff --git a/src/Storages/WindowView/StorageWindowView.cpp b/src/Storages/WindowView/StorageWindowView.cpp index 8bca1c97aad..da4e751a88a 100644 --- a/src/Storages/WindowView/StorageWindowView.cpp +++ b/src/Storages/WindowView/StorageWindowView.cpp @@ -567,7 +567,7 @@ std::pair StorageWindowView::getNewBlocks(UInt32 watermark) builder.addSimpleTransform([&](const Block & header) { return std::make_shared( - header, std::make_shared(filter_expression), filter_function->getColumnName(), true); + header, std::make_shared(std::move(filter_expression)), filter_function->getColumnName(), true); }); /// Adding window column @@ -592,7 +592,7 @@ std::pair StorageWindowView::getNewBlocks(UInt32 watermark) new_header.getColumnsWithTypeAndName(), ActionsDAG::MatchColumnsMode::Name); auto actions = std::make_shared( - convert_actions_dag, ExpressionActionsSettings::fromContext(getContext(), CompileExpressions::yes)); + std::move(convert_actions_dag), ExpressionActionsSettings::fromContext(getContext(), CompileExpressions::yes)); builder.addSimpleTransform([&](const Block & stream_header) { return std::make_shared(stream_header, actions); @@ -700,7 +700,7 @@ inline void StorageWindowView::fire(UInt32 watermark) getTargetTable()->getInMemoryMetadataPtr()->getColumns(), getContext(), getContext()->getSettingsRef().insert_null_as_default); - auto adding_missing_defaults_actions = std::make_shared(adding_missing_defaults_dag); + auto adding_missing_defaults_actions = std::make_shared(std::move(adding_missing_defaults_dag)); pipe.addSimpleTransform([&](const Block & stream_header) { return std::make_shared(stream_header, adding_missing_defaults_actions); @@ -711,7 +711,7 @@ inline void StorageWindowView::fire(UInt32 watermark) block_io.pipeline.getHeader().getColumnsWithTypeAndName(), ActionsDAG::MatchColumnsMode::Position); auto actions = std::make_shared( - convert_actions_dag, + std::move(convert_actions_dag), ExpressionActionsSettings::fromContext(getContext(), CompileExpressions::yes)); pipe.addSimpleTransform([&](const Block & stream_header) { @@ -1475,7 +1475,7 @@ void StorageWindowView::writeIntoWindowView( pipe.addSimpleTransform([&](const Block & header) { return std::make_shared( - header, std::make_shared(filter_expression), + header, std::make_shared(std::move(filter_expression)), filter_function->getColumnName(), true); }); } @@ -1583,7 +1583,7 @@ void StorageWindowView::writeIntoWindowView( output->getHeader().getColumnsWithTypeAndName(), ActionsDAG::MatchColumnsMode::Name); auto convert_actions = std::make_shared( - convert_actions_dag, ExpressionActionsSettings::fromContext(local_context, CompileExpressions::yes)); + std::move(convert_actions_dag), ExpressionActionsSettings::fromContext(local_context, CompileExpressions::yes)); builder.addSimpleTransform([&](const Block & header) { return std::make_shared(header, convert_actions); }); } From e348186ba26072eb76e549cd2a0adcd801c92bc6 Mon Sep 17 00:00:00 2001 From: jsc0218 Date: Tue, 18 Jun 2024 21:30:37 +0000 Subject: [PATCH 005/661] clear hint in table engine and sources --- src/Access/Common/AccessRightsElement.cpp | 6 +- src/Access/Common/AccessRightsElement.h | 1 + src/Access/ContextAccess.cpp | 80 ++++++++++++++++++----- 3 files changed, 70 insertions(+), 17 deletions(-) diff --git a/src/Access/Common/AccessRightsElement.cpp b/src/Access/Common/AccessRightsElement.cpp index 24ff4e7631b..2ee13d6b94f 100644 --- a/src/Access/Common/AccessRightsElement.cpp +++ b/src/Access/Common/AccessRightsElement.cpp @@ -224,7 +224,11 @@ void AccessRightsElement::replaceEmptyDatabase(const String & current_database) String AccessRightsElement::toString() const { return toStringImpl(*this, true); } String AccessRightsElement::toStringWithoutOptions() const { return toStringImpl(*this, false); } - +String AccessRightsElement::toStringWithoutONClause() const +{ + String result{access_flags.toKeywords().front()}; + return result + " ON {db.table}"; +} bool AccessRightsElements::empty() const { return std::all_of(begin(), end(), [](const AccessRightsElement & e) { return e.empty(); }); } diff --git a/src/Access/Common/AccessRightsElement.h b/src/Access/Common/AccessRightsElement.h index ba625fc43df..49764fc727f 100644 --- a/src/Access/Common/AccessRightsElement.h +++ b/src/Access/Common/AccessRightsElement.h @@ -89,6 +89,7 @@ struct AccessRightsElement /// Returns a human-readable representation like "GRANT SELECT, UPDATE(x, y) ON db.table". String toString() const; String toStringWithoutOptions() const; + String toStringWithoutONClause() const; }; diff --git a/src/Access/ContextAccess.cpp b/src/Access/ContextAccess.cpp index 2a658d7aaa2..3ce30a0b681 100644 --- a/src/Access/ContextAccess.cpp +++ b/src/Access/ContextAccess.cpp @@ -37,6 +37,24 @@ namespace ErrorCodes namespace { + static const std::vector> source_and_table_engines = { + {AccessType::FILE, "File"}, + {AccessType::URL, "URL"}, + {AccessType::REMOTE, "Distributed"}, + {AccessType::MONGO, "MongoDB"}, + {AccessType::REDIS, "Redis"}, + {AccessType::MYSQL, "MySQL"}, + {AccessType::POSTGRES, "PostgreSQL"}, + {AccessType::SQLITE, "SQLite"}, + {AccessType::ODBC, "ODBC"}, + {AccessType::JDBC, "JDBC"}, + {AccessType::HDFS, "HDFS"}, + {AccessType::S3, "S3"}, + {AccessType::HIVE, "Hive"}, + {AccessType::AZURE, "AzureBlobStorage"} + }; + + AccessRights mixAccessRightsFromUserAndRoles(const User & user, const EnabledRolesInfo & roles_info) { AccessRights res = user.access; @@ -205,22 +223,6 @@ namespace } /// There is overlap between AccessType sources and table engines, so the following code avoids user granting twice. - static const std::vector> source_and_table_engines = { - {AccessType::FILE, "File"}, - {AccessType::URL, "URL"}, - {AccessType::REMOTE, "Distributed"}, - {AccessType::MONGO, "MongoDB"}, - {AccessType::REDIS, "Redis"}, - {AccessType::MYSQL, "MySQL"}, - {AccessType::POSTGRES, "PostgreSQL"}, - {AccessType::SQLITE, "SQLite"}, - {AccessType::ODBC, "ODBC"}, - {AccessType::JDBC, "JDBC"}, - {AccessType::HDFS, "HDFS"}, - {AccessType::S3, "S3"}, - {AccessType::HIVE, "Hive"}, - {AccessType::AZURE, "AzureBlobStorage"} - }; /// Sync SOURCE and TABLE_ENGINE, so only need to check TABLE_ENGINE later. if (access_control.doesTableEnginesRequireGrant()) @@ -555,6 +557,18 @@ std::shared_ptr ContextAccess::getAccessRightsWithImplicit() return nothing_granted; } +/// Just Dummy to pass compile. +template +static std::string_view getTableEngineName(const Args &... args[[maybe_unused]]) +{ + return ""; +} + +template +static std::string_view getTableEngineName(std::string_view name, const Args &... args[[maybe_unused]]) +{ + return name; +} template bool ContextAccess::checkAccessImplHelper(AccessFlags flags, const Args &... args) const @@ -611,6 +625,40 @@ bool ContextAccess::checkAccessImplHelper(AccessFlags flags, const Args &... arg if (!granted) { + /// As we check the SOURCES from the Table Engine logic, direct prompt about Table Engine would be misleading since + /// SOURCES is not granted actually. In order to solve this, turn the prompt logic back to Sources. + if (flags & AccessType::TABLE_ENGINE && !access_control->doesTableEnginesRequireGrant()) + { + AccessFlags newFlags; + + String table_engine_name{getTableEngineName(args...)}; + for (const auto & source_and_table_engine : source_and_table_engines) + { + const auto & table_engine = std::get<1>(source_and_table_engine); + if (table_engine != table_engine_name) continue; + const auto & source = std::get<0>(source_and_table_engine); + /// Set the flags from Table Engine to SOURCES so that prompts can be meaningful. + newFlags = source; + break; + } + + if (newFlags == AccessType::NONE) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Didn't find the target Source from the Table Engine"); + + if (grant_option && acs->isGranted(flags, args...)) + { + return access_denied(ErrorCodes::ACCESS_DENIED, + "{}: Not enough privileges. " + "The required privileges have been granted, but without grant option. " + "To execute this query, it's necessary to have the grant {} WITH GRANT OPTION", + AccessRightsElement{newFlags}.toStringWithoutONClause()); + } + + return access_denied(ErrorCodes::ACCESS_DENIED, + "{}: Not enough privileges. To execute this query, it's necessary to have the grant {}", + AccessRightsElement{newFlags}.toStringWithoutONClause() + (grant_option ? " WITH GRANT OPTION" : "")); + } + if (grant_option && acs->isGranted(flags, args...)) { return access_denied(ErrorCodes::ACCESS_DENIED, From f7e81e1ae2752020c076990395349ccd2d69cf2b Mon Sep 17 00:00:00 2001 From: jsc0218 Date: Tue, 18 Jun 2024 21:59:07 +0000 Subject: [PATCH 006/661] fix --- src/Access/ContextAccess.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Access/ContextAccess.cpp b/src/Access/ContextAccess.cpp index 3ce30a0b681..de0e7e3d777 100644 --- a/src/Access/ContextAccess.cpp +++ b/src/Access/ContextAccess.cpp @@ -625,8 +625,8 @@ bool ContextAccess::checkAccessImplHelper(AccessFlags flags, const Args &... arg if (!granted) { - /// As we check the SOURCES from the Table Engine logic, direct prompt about Table Engine would be misleading since - /// SOURCES is not granted actually. In order to solve this, turn the prompt logic back to Sources. + /// As we check the SOURCES from the Table Engine logic, direct prompt about Table Engine would be misleading + /// since SOURCES is not granted actually. In order to solve this, turn the prompt logic back to Sources. if (flags & AccessType::TABLE_ENGINE && !access_control->doesTableEnginesRequireGrant()) { AccessFlags newFlags; From a2ee0668f12c8cd1b88b8c4ad46c15271a5a1fd2 Mon Sep 17 00:00:00 2001 From: jsc0218 Date: Wed, 19 Jun 2024 02:20:22 +0000 Subject: [PATCH 007/661] fix --- src/Access/ContextAccess.cpp | 23 ++++++++--------------- 1 file changed, 8 insertions(+), 15 deletions(-) diff --git a/src/Access/ContextAccess.cpp b/src/Access/ContextAccess.cpp index de0e7e3d777..4620561053b 100644 --- a/src/Access/ContextAccess.cpp +++ b/src/Access/ContextAccess.cpp @@ -37,7 +37,7 @@ namespace ErrorCodes namespace { - static const std::vector> source_and_table_engines = { + const std::vector> source_and_table_engines = { {AccessType::FILE, "File"}, {AccessType::URL, "URL"}, {AccessType::REMOTE, "Distributed"}, @@ -268,6 +268,11 @@ namespace template std::string_view getDatabase(std::string_view arg1, const OtherArgs &...) { return arg1; } + + std::string_view getTableEngine() { return {}; } + + template + std::string_view getTableEngine(std::string_view arg1, const OtherArgs &...) { return arg1; } } @@ -557,18 +562,6 @@ std::shared_ptr ContextAccess::getAccessRightsWithImplicit() return nothing_granted; } -/// Just Dummy to pass compile. -template -static std::string_view getTableEngineName(const Args &... args[[maybe_unused]]) -{ - return ""; -} - -template -static std::string_view getTableEngineName(std::string_view name, const Args &... args[[maybe_unused]]) -{ - return name; -} template bool ContextAccess::checkAccessImplHelper(AccessFlags flags, const Args &... args) const @@ -631,7 +624,7 @@ bool ContextAccess::checkAccessImplHelper(AccessFlags flags, const Args &... arg { AccessFlags newFlags; - String table_engine_name{getTableEngineName(args...)}; + String table_engine_name{getTableEngine(args...)}; for (const auto & source_and_table_engine : source_and_table_engines) { const auto & table_engine = std::get<1>(source_and_table_engine); @@ -642,7 +635,7 @@ bool ContextAccess::checkAccessImplHelper(AccessFlags flags, const Args &... arg break; } - if (newFlags == AccessType::NONE) + if (newFlags.isEmpty()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Didn't find the target Source from the Table Engine"); if (grant_option && acs->isGranted(flags, args...)) From b125f8166f32648572c2ed0d540ded56a97ac628 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Wed, 19 Jun 2024 16:41:50 +0100 Subject: [PATCH 008/661] impl --- tests/performance/array_reduce.xml | 15 ++++++--------- tests/performance/scripts/perf.py | 14 ++++++++++++-- 2 files changed, 18 insertions(+), 11 deletions(-) diff --git a/tests/performance/array_reduce.xml b/tests/performance/array_reduce.xml index 8e271c155f3..0f50eba43bf 100644 --- a/tests/performance/array_reduce.xml +++ b/tests/performance/array_reduce.xml @@ -1,11 +1,8 @@ - - - - SELECT arrayReduce('count', range(100000000)) - SELECT arrayReduce('sum', range(100000000)) - SELECT arrayReduceInRanges('count', [(1, 100000000)], range(100000000)) - SELECT arrayReduceInRanges('sum', [(1, 100000000)], range(100000000)) - SELECT arrayReduceInRanges('count', arrayZip(range(1000000), range(1000000)), range(100000000))[123456] - SELECT arrayReduceInRanges('sum', arrayZip(range(1000000), range(1000000)), range(100000000))[123456] + SELECT arrayReduce('count', range(1000000)) FROM numbers_mt(500000000) format Null + SELECT arrayReduce('sum', range(1000000)) FROM numbers_mt(500000000) format Null + SELECT arrayReduceInRanges('count', [(1, 1000000)], range(1000000)) FROM numbers_mt(500000000) format Null + SELECT arrayReduceInRanges('sum', [(1, 1000000)], range(1000000)) FROM numbers_mt(500000000) format Null + SELECT arrayReduceInRanges('count', arrayZip(range(1000000), range(1000000)), range(1000000))[123456] + SELECT arrayReduceInRanges('sum', arrayZip(range(1000000), range(1000000)), range(1000000))[123456] diff --git a/tests/performance/scripts/perf.py b/tests/performance/scripts/perf.py index 94f145d82db..f89784a0e0b 100755 --- a/tests/performance/scripts/perf.py +++ b/tests/performance/scripts/perf.py @@ -345,6 +345,18 @@ for query_index in queries_to_run: print(f"display-name\t{query_index}\t{tsv_escape(query_display_name)}") + for conn_index, c in enumerate(all_connections): + try: + c.execute("SYSTEM JEMALLOC PURGE") + + print( + f"purging jemalloc arenas\t{conn_index}\t{c.last_query.elapsed}" + ) + except KeyboardInterrupt: + raise + except: + continue + # Prewarm: run once on both servers. Helps to bring the data into memory, # precompile the queries, etc. # A query might not run on the old server if it uses a function added in the @@ -427,8 +439,6 @@ for query_index in queries_to_run: for conn_index, c in enumerate(this_query_connections): try: - c.execute("SYSTEM JEMALLOC PURGE") - res = c.execute( q, query_id=run_id, From 6f1f416700a32cf95af15b79543e27cbcffe2f14 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Thu, 20 Jun 2024 17:37:05 +0100 Subject: [PATCH 009/661] one more test --- tests/performance/final_big_column.xml | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/performance/final_big_column.xml b/tests/performance/final_big_column.xml index 1fd586d2d90..4bfdfdf804f 100644 --- a/tests/performance/final_big_column.xml +++ b/tests/performance/final_big_column.xml @@ -1,6 +1,7 @@ - 1 + + 8 20G @@ -10,8 +11,8 @@ PARTITION BY toYYYYMM(d) ORDER BY key - INSERT INTO optimized_select_final SELECT toDate('2000-01-01'), 2*number, randomPrintableASCII(1000) FROM numbers(5000000) - INSERT INTO optimized_select_final SELECT toDate('2020-01-01'), 2*number+1, randomPrintableASCII(1000) FROM numbers(5000000) + INSERT INTO optimized_select_final SELECT toDate('2000-01-01'), 2*number, randomPrintableASCII(1000) FROM numbers_mt(5000000) + INSERT INTO optimized_select_final SELECT toDate('2020-01-01'), 2*number+1, randomPrintableASCII(1000) FROM numbers_mt(5000000) SELECT * FROM optimized_select_final FINAL FORMAT Null SETTINGS max_threads = 8 SELECT * FROM optimized_select_final FINAL WHERE key % 10 = 0 FORMAT Null From 91dc9a69d844d781c7d4f94ca01d0a9bbe1a1f29 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Fri, 21 Jun 2024 22:03:38 +0100 Subject: [PATCH 010/661] fix final_big_column --- tests/performance/final_big_column.xml | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/tests/performance/final_big_column.xml b/tests/performance/final_big_column.xml index 4bfdfdf804f..bc7c3570db6 100644 --- a/tests/performance/final_big_column.xml +++ b/tests/performance/final_big_column.xml @@ -1,7 +1,6 @@ - - 8 + 1 20G @@ -11,8 +10,8 @@ PARTITION BY toYYYYMM(d) ORDER BY key - INSERT INTO optimized_select_final SELECT toDate('2000-01-01'), 2*number, randomPrintableASCII(1000) FROM numbers_mt(5000000) - INSERT INTO optimized_select_final SELECT toDate('2020-01-01'), 2*number+1, randomPrintableASCII(1000) FROM numbers_mt(5000000) + INSERT INTO optimized_select_final SELECT toDate('2000-01-01'), 2*number, randomPrintableASCII(1000) FROM numbers(1000000) + INSERT INTO optimized_select_final SELECT toDate('2020-01-01'), 2*number+1, randomPrintableASCII(1000) FROM numbers(1000000) SELECT * FROM optimized_select_final FINAL FORMAT Null SETTINGS max_threads = 8 SELECT * FROM optimized_select_final FINAL WHERE key % 10 = 0 FORMAT Null From cb0a692ba061d910712f0144cb2b5308db5d033f Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Fri, 21 Jun 2024 23:42:56 +0100 Subject: [PATCH 011/661] fix read_from_comp_parts --- tests/performance/read_from_comp_parts.xml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/performance/read_from_comp_parts.xml b/tests/performance/read_from_comp_parts.xml index a625075588a..055df51d12d 100644 --- a/tests/performance/read_from_comp_parts.xml +++ b/tests/performance/read_from_comp_parts.xml @@ -5,15 +5,16 @@ ORDER BY (c1, c2) SETTINGS min_rows_for_wide_part = 1000000000 AS SELECT * - FROM generateRandom('c1 UInt32, c2 UInt64, s1 String, arr1 Array(UInt32), c3 UInt64, s2 String', 0, 30, 30) + FROM generateRandom('c1 UInt32, c2 UInt64, s1 String, arr1 Array(UInt32), c3 UInt64, s2 String', 0, 5, 6) LIMIT 50000000 + SETTINGS max_insert_threads = 8 8 - SELECT count() FROM mt_comp_parts WHERE NOT ignore(c1) + SELECT count() FROM mt_comp_parts WHERE NOT ignore(s1) SELECT count() FROM mt_comp_parts WHERE NOT ignore(c2, s1, arr1, s2) SELECT count() FROM mt_comp_parts WHERE NOT ignore(c1, s1, c3) SELECT count() FROM mt_comp_parts WHERE NOT ignore(c1, c2, c3) From 051290e6c912108986c896916db087c71230a121 Mon Sep 17 00:00:00 2001 From: serxa Date: Mon, 24 Jun 2024 12:26:46 +0000 Subject: [PATCH 012/661] Add throughput introspection for scheduler queues --- src/Common/EventRateMeter.h | 18 ++++++++++++++++-- src/Common/Scheduler/ISchedulerNode.h | 14 ++++++++++++++ src/Common/Scheduler/Nodes/FairPolicy.h | 3 +-- src/Common/Scheduler/Nodes/FifoQueue.h | 3 +-- src/Common/Scheduler/Nodes/PriorityPolicy.h | 3 +-- .../Scheduler/Nodes/SemaphoreConstraint.h | 3 +-- .../Scheduler/Nodes/ThrottlerConstraint.h | 3 +-- src/Common/Scheduler/SchedulerRoot.h | 3 +-- src/Storages/System/StorageSystemScheduler.cpp | 2 ++ 9 files changed, 38 insertions(+), 14 deletions(-) diff --git a/src/Common/EventRateMeter.h b/src/Common/EventRateMeter.h index 3a21a80ce8b..4c38d1d9371 100644 --- a/src/Common/EventRateMeter.h +++ b/src/Common/EventRateMeter.h @@ -14,8 +14,9 @@ namespace DB class EventRateMeter { public: - explicit EventRateMeter(double now, double period_) + explicit EventRateMeter(double now, double period_, double step_ = 0.0) : period(period_) + , step(step_) , half_decay_time(period * std::numbers::ln2) // for `ExponentiallySmoothedAverage::sumWeights()` to be equal to `1/period` { reset(now); @@ -38,7 +39,16 @@ public: if (now - period <= start) // precise counting mode events = ExponentiallySmoothedAverage(events.value + count, now); else // exponential smoothing mode - events.add(count, now, half_decay_time); + { + // Adding events too often lead to low precision due to smoothing too often, so we buffer new events and add them in steps + step_count += count; + if (step_start + step <= now) + { + events.add(step_count, now, half_decay_time); + step_start = now; + step_count = 0; + } + } } /// Compute average event rate throughout `[now - period, now]` period. @@ -58,16 +68,20 @@ public: void reset(double now) { start = now; + step_start = now; events = ExponentiallySmoothedAverage(); data_points = 0; } private: const double period; + const double step; // duration of a step const double half_decay_time; double start; // Instant in past without events before it; when measurement started or reset ExponentiallySmoothedAverage events; // Estimated number of events in the last `period` size_t data_points = 0; + double step_start; // start instant of the last step + double step_count = 0.0; // number of events accumulated since step start }; } diff --git a/src/Common/Scheduler/ISchedulerNode.h b/src/Common/Scheduler/ISchedulerNode.h index 81b491b0eda..c051829e336 100644 --- a/src/Common/Scheduler/ISchedulerNode.h +++ b/src/Common/Scheduler/ISchedulerNode.h @@ -3,6 +3,8 @@ #include #include #include +#include +#include #include #include @@ -176,6 +178,14 @@ protected: /// Postponed to be handled in scheduler thread, so it is intended to be called from outside. void scheduleActivation(); + /// Helper for introspection metrics + void incrementDequeued(ResourceCost cost) + { + dequeued_requests++; + dequeued_cost += cost; + throughput.add(static_cast(clock_gettime_ns())/1e9, cost); + } + public: EventQueue * const event_queue; String basename; @@ -189,6 +199,10 @@ public: std::atomic dequeued_cost{0}; std::atomic canceled_cost{0}; std::atomic busy_periods{0}; + + /// Average dequeued_cost per second + /// WARNING: Should only be accessed from the scheduler thread, so that locking is not required + EventRateMeter throughput{static_cast(clock_gettime_ns())/1e9, 2, 1}; }; using SchedulerNodePtr = std::shared_ptr; diff --git a/src/Common/Scheduler/Nodes/FairPolicy.h b/src/Common/Scheduler/Nodes/FairPolicy.h index 0a4e55c253b..fba637e979e 100644 --- a/src/Common/Scheduler/Nodes/FairPolicy.h +++ b/src/Common/Scheduler/Nodes/FairPolicy.h @@ -188,8 +188,7 @@ public: if (request) { - dequeued_requests++; - dequeued_cost += request->cost; + incrementDequeued(request->cost); return {request, heap_size > 0}; } } diff --git a/src/Common/Scheduler/Nodes/FifoQueue.h b/src/Common/Scheduler/Nodes/FifoQueue.h index 9ec997c06d2..9fbc6d1ae65 100644 --- a/src/Common/Scheduler/Nodes/FifoQueue.h +++ b/src/Common/Scheduler/Nodes/FifoQueue.h @@ -59,8 +59,7 @@ public: if (requests.empty()) busy_periods++; queue_cost -= result->cost; - dequeued_requests++; - dequeued_cost += result->cost; + incrementDequeued(result->cost); return {result, !requests.empty()}; } diff --git a/src/Common/Scheduler/Nodes/PriorityPolicy.h b/src/Common/Scheduler/Nodes/PriorityPolicy.h index 22a5155cfeb..91dc95600d5 100644 --- a/src/Common/Scheduler/Nodes/PriorityPolicy.h +++ b/src/Common/Scheduler/Nodes/PriorityPolicy.h @@ -122,8 +122,7 @@ public: if (request) { - dequeued_requests++; - dequeued_cost += request->cost; + incrementDequeued(request->cost); return {request, !items.empty()}; } } diff --git a/src/Common/Scheduler/Nodes/SemaphoreConstraint.h b/src/Common/Scheduler/Nodes/SemaphoreConstraint.h index 10fce536f5d..92c6af9db18 100644 --- a/src/Common/Scheduler/Nodes/SemaphoreConstraint.h +++ b/src/Common/Scheduler/Nodes/SemaphoreConstraint.h @@ -81,8 +81,7 @@ public: child_active = child_now_active; if (!active()) busy_periods++; - dequeued_requests++; - dequeued_cost += request->cost; + incrementDequeued(request->cost); return {request, active()}; } diff --git a/src/Common/Scheduler/Nodes/ThrottlerConstraint.h b/src/Common/Scheduler/Nodes/ThrottlerConstraint.h index f4a5795bb2b..56866336f50 100644 --- a/src/Common/Scheduler/Nodes/ThrottlerConstraint.h +++ b/src/Common/Scheduler/Nodes/ThrottlerConstraint.h @@ -89,8 +89,7 @@ public: child_active = child_now_active; if (!active()) busy_periods++; - dequeued_requests++; - dequeued_cost += request->cost; + incrementDequeued(request->cost); return {request, active()}; } diff --git a/src/Common/Scheduler/SchedulerRoot.h b/src/Common/Scheduler/SchedulerRoot.h index 7af42fdbbea..5307aadc3cc 100644 --- a/src/Common/Scheduler/SchedulerRoot.h +++ b/src/Common/Scheduler/SchedulerRoot.h @@ -162,8 +162,7 @@ public: if (request == nullptr) // Possible in case of request cancel, just retry continue; - dequeued_requests++; - dequeued_cost += request->cost; + incrementDequeued(request->cost); return {request, current != nullptr}; } } diff --git a/src/Storages/System/StorageSystemScheduler.cpp b/src/Storages/System/StorageSystemScheduler.cpp index 339a59e88a5..b42c807d6fc 100644 --- a/src/Storages/System/StorageSystemScheduler.cpp +++ b/src/Storages/System/StorageSystemScheduler.cpp @@ -31,6 +31,7 @@ ColumnsDescription StorageSystemScheduler::getColumnsDescription() {"dequeued_requests", std::make_shared(), "The total number of resource requests dequeued from this node."}, {"canceled_requests", std::make_shared(), "The total number of resource requests canceled from this node."}, {"dequeued_cost", std::make_shared(), "The sum of costs (e.g. size in bytes) of all requests dequeued from this node."}, + {"throughput", std::make_shared(), "Current average throughput (dequeued cost per second)."}, {"canceled_cost", std::make_shared(), "The sum of costs (e.g. size in bytes) of all requests canceled from this node."}, {"busy_periods", std::make_shared(), "The total number of deactivations of this node."}, {"vruntime", std::make_shared(std::make_shared()), @@ -96,6 +97,7 @@ void StorageSystemScheduler::fillData(MutableColumns & res_columns, ContextPtr c res_columns[i++]->insert(node->dequeued_requests.load()); res_columns[i++]->insert(node->canceled_requests.load()); res_columns[i++]->insert(node->dequeued_cost.load()); + res_columns[i++]->insert(node->throughput.rate(static_cast(clock_gettime_ns())/1e9)); res_columns[i++]->insert(node->canceled_cost.load()); res_columns[i++]->insert(node->busy_periods.load()); From b0ac0327d4d38a918e9be8fc499038cc43cafb2c Mon Sep 17 00:00:00 2001 From: serxa Date: Tue, 25 Jun 2024 11:48:29 +0000 Subject: [PATCH 013/661] Fix bug in EventRateMeter It was relying on ExponentiallySmoothedCounter::get() which is designed for specific 1 second time interval between points. Now sum of weights is computed separatly in `duration` field, giving very accurate measurements independent of interval. --- src/Common/EventRateMeter.h | 52 ++++++---------- src/Common/ProgressIndication.h | 2 +- src/Common/tests/gtest_event_rate_meter.cpp | 68 +++++++++++++++++++++ 3 files changed, 86 insertions(+), 36 deletions(-) create mode 100644 src/Common/tests/gtest_event_rate_meter.cpp diff --git a/src/Common/EventRateMeter.h b/src/Common/EventRateMeter.h index 4c38d1d9371..b8a9112428f 100644 --- a/src/Common/EventRateMeter.h +++ b/src/Common/EventRateMeter.h @@ -4,8 +4,6 @@ #include -#include - namespace DB { @@ -14,10 +12,10 @@ namespace DB class EventRateMeter { public: - explicit EventRateMeter(double now, double period_, double step_ = 0.0) + explicit EventRateMeter(double now, double period_, size_t heating_ = 0) : period(period_) - , step(step_) - , half_decay_time(period * std::numbers::ln2) // for `ExponentiallySmoothedAverage::sumWeights()` to be equal to `1/period` + , max_interval(period * 10) + , heating(heating_) { reset(now); } @@ -30,25 +28,11 @@ public: { // Remove data for initial heating stage that can present at the beginning of a query. // Otherwise it leads to wrong gradual increase of average value, turning algorithm into not very reactive. - if (count != 0.0 && ++data_points < 5) - { - start = events.time; - events = ExponentiallySmoothedAverage(); - } + if (count != 0.0 && data_points++ <= heating) + reset(events.time, data_points); - if (now - period <= start) // precise counting mode - events = ExponentiallySmoothedAverage(events.value + count, now); - else // exponential smoothing mode - { - // Adding events too often lead to low precision due to smoothing too often, so we buffer new events and add them in steps - step_count += count; - if (step_start + step <= now) - { - events.add(step_count, now, half_decay_time); - step_start = now; - step_count = 0; - } - } + duration.add(std::min(max_interval, now - duration.time), now, period); + events.add(count, now, period); } /// Compute average event rate throughout `[now - period, now]` period. @@ -59,29 +43,27 @@ public: add(now, 0); if (unlikely(now <= start)) return 0; - if (now - period <= start) // precise counting mode - return events.value / (now - start); - else // exponential smoothing mode - return events.get(half_decay_time); // equals to `events.value / period` + + // We do not use .get() because sum of weights will anyway be canceled out (optimization) + return events.value / duration.value; } - void reset(double now) + void reset(double now, size_t data_points_ = 0) { start = now; - step_start = now; events = ExponentiallySmoothedAverage(); - data_points = 0; + duration = ExponentiallySmoothedAverage(); + data_points = data_points_; } private: const double period; - const double step; // duration of a step - const double half_decay_time; + const double max_interval; + const size_t heating; double start; // Instant in past without events before it; when measurement started or reset - ExponentiallySmoothedAverage events; // Estimated number of events in the last `period` + ExponentiallySmoothedAverage duration; // Current duration of a period + ExponentiallySmoothedAverage events; // Estimated number of events in last `duration` seconds size_t data_points = 0; - double step_start; // start instant of the last step - double step_count = 0.0; // number of events accumulated since step start }; } diff --git a/src/Common/ProgressIndication.h b/src/Common/ProgressIndication.h index a9965785889..d925077a072 100644 --- a/src/Common/ProgressIndication.h +++ b/src/Common/ProgressIndication.h @@ -91,7 +91,7 @@ private: bool write_progress_on_update = false; - EventRateMeter cpu_usage_meter{static_cast(clock_gettime_ns()), 2'000'000'000 /*ns*/}; // average cpu utilization last 2 second + EventRateMeter cpu_usage_meter{static_cast(clock_gettime_ns()), 2'000'000'000 /*ns*/, 4}; // average cpu utilization last 2 second, skip first 4 points HostToTimesMap hosts_data; /// In case of all of the above: /// - clickhouse-local diff --git a/src/Common/tests/gtest_event_rate_meter.cpp b/src/Common/tests/gtest_event_rate_meter.cpp new file mode 100644 index 00000000000..91ceec5eef7 --- /dev/null +++ b/src/Common/tests/gtest_event_rate_meter.cpp @@ -0,0 +1,68 @@ +#include + +#include + +#include + + +TEST(EventRateMeter, ExponentiallySmoothedAverage) +{ + double target = 100.0; + + // The test is only correct for timestep of 1 second because of + // how sum of weights is implemented inside `ExponentiallySmoothedAverage` + double time_step = 1.0; + + for (double half_decay_time : { 0.1, 1.0, 10.0, 100.0}) + { + DB::ExponentiallySmoothedAverage esa; + + int steps = static_cast(half_decay_time * 30 / time_step); + for (int i = 1; i <= steps; ++i) + esa.add(target * time_step, i * time_step, half_decay_time); + double measured = esa.get(half_decay_time); + ASSERT_LE(std::fabs(measured - target), 1e-5 * target); + } +} + +TEST(EventRateMeter, ConstantRate) +{ + double target = 100.0; + + for (double period : {0.1, 1.0, 10.0}) + { + for (double time_step : {0.001, 0.01, 0.1, 1.0}) + { + DB::EventRateMeter erm(0.0, period); + + int steps = static_cast(period * 30 / time_step); + for (int i = 1; i <= steps; ++i) + erm.add(i * time_step, target * time_step); + double measured = erm.rate(steps * time_step); + // std::cout << "T=" << period << " dt=" << time_step << " measured=" << measured << std::endl; + ASSERT_LE(std::fabs(measured - target), 1e-5 * target); + } + } +} + +TEST(EventRateMeter, PreciseStart) +{ + double target = 100.0; + + for (double period : {0.1, 1.0, 10.0}) + { + for (double time_step : {0.001, 0.01, 0.1, 1.0}) + { + DB::EventRateMeter erm(0.0, period); + + int steps = static_cast(period / time_step); + for (int i = 1; i <= steps; ++i) + { + erm.add(i * time_step, target * time_step); + double measured = erm.rate(i * time_step); + // std::cout << "T=" << period << " dt=" << time_step << " measured=" << measured << std::endl; + ASSERT_LE(std::fabs(measured - target), 1e-5 * target); + } + } + } +} From 5d9d29e37086e19dba852097e7820aff83072a8d Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 25 Jun 2024 15:00:07 +0000 Subject: [PATCH 014/661] Fixing some crashes --- src/Interpreters/ActionsDAG.cpp | 15 ++++++++----- src/Interpreters/ActionsDAG.h | 5 +++-- src/Interpreters/ExpressionActions.cpp | 8 ++++--- src/Interpreters/ExpressionAnalyzer.cpp | 4 ++-- src/Interpreters/InterpreterSelectQuery.cpp | 14 ++++++------ src/Interpreters/MutationsInterpreter.cpp | 4 ++-- src/Planner/Planner.cpp | 18 ++++++++------- src/Planner/PlannerJoinTree.cpp | 22 +++++++++---------- src/Planner/PlannerJoins.cpp | 4 ++-- src/Planner/PlannerWindowFunctions.cpp | 4 +++- src/Processors/QueryPlan/ExpressionStep.cpp | 8 +++---- src/Processors/QueryPlan/FilterStep.cpp | 6 ++--- .../Optimizations/distinctReadInOrder.cpp | 4 ++-- .../Optimizations/filterPushDown.cpp | 4 ++-- .../QueryPlan/Optimizations/liftUpUnion.cpp | 2 +- .../Optimizations/optimizePrewhere.cpp | 2 +- .../optimizePrimaryKeyCondition.cpp | 6 ++--- .../Optimizations/optimizeReadInOrder.cpp | 8 +++---- .../optimizeUseAggregateProjection.cpp | 2 +- .../Optimizations/projectionsCommon.cpp | 6 ++--- .../Optimizations/removeRedundantDistinct.cpp | 4 ++-- .../QueryPlan/ReadFromMergeTree.cpp | 16 +++++++------- .../QueryPlan/SourceStepWithFilter.cpp | 8 +++---- src/Processors/QueryPlan/TotalsHavingStep.cpp | 8 +++---- .../Transforms/FillingTransform.cpp | 2 +- src/Storages/MergeTree/MergeTreeIndexSet.cpp | 4 ++-- .../MergeTree/MergeTreeSelectProcessor.cpp | 4 ++-- src/Storages/SelectQueryInfo.h | 4 ++-- src/Storages/StorageBuffer.cpp | 8 +++---- src/Storages/StorageMerge.cpp | 6 ++--- src/Storages/VirtualColumnUtils.cpp | 4 ++-- 31 files changed, 112 insertions(+), 102 deletions(-) diff --git a/src/Interpreters/ActionsDAG.cpp b/src/Interpreters/ActionsDAG.cpp index 23e1e5ee152..c2626285235 100644 --- a/src/Interpreters/ActionsDAG.cpp +++ b/src/Interpreters/ActionsDAG.cpp @@ -1246,17 +1246,20 @@ bool ActionsDAG::removeUnusedResult(const std::string & column_name) return true; } -ActionsDAGPtr ActionsDAG::clone() const +ActionsDAGPtr ActionsDAG::clone(const ActionsDAG * from) { std::unordered_map old_to_new_nodes; - return clone(old_to_new_nodes); + return ActionsDAG::clone(from, old_to_new_nodes); } -ActionsDAGPtr ActionsDAG::clone(std::unordered_map & old_to_new_nodes) const +ActionsDAGPtr ActionsDAG::clone(const ActionsDAG * from, std::unordered_map & old_to_new_nodes) { + if (!from) + return nullptr; + auto actions = std::make_unique(); - for (const auto & node : nodes) + for (const auto & node : from->nodes) { auto & copy_node = actions->nodes.emplace_back(node); old_to_new_nodes[&node] = ©_node; @@ -1266,10 +1269,10 @@ ActionsDAGPtr ActionsDAG::clone(std::unordered_map & old_t for (auto & child : node.children) child = old_to_new_nodes[child]; - for (const auto & output_node : outputs) + for (const auto & output_node : from->outputs) actions->outputs.push_back(old_to_new_nodes[output_node]); - for (const auto & input_node : inputs) + for (const auto & input_node : from->inputs) actions->inputs.push_back(old_to_new_nodes[input_node]); return actions; diff --git a/src/Interpreters/ActionsDAG.h b/src/Interpreters/ActionsDAG.h index 4a840885b6a..7ca3d1c1b0d 100644 --- a/src/Interpreters/ActionsDAG.h +++ b/src/Interpreters/ActionsDAG.h @@ -261,8 +261,9 @@ public: void compileExpressions(size_t min_count_to_compile_expression, const std::unordered_set & lazy_executed_nodes = {}); #endif - ActionsDAGPtr clone() const; - ActionsDAGPtr clone(std::unordered_map & old_to_new_nodes) const; + static ActionsDAGPtr clone(const ActionsDAGPtr & from) { return clone(from.get()); } + static ActionsDAGPtr clone(const ActionsDAG * from); + static ActionsDAGPtr clone(const ActionsDAG * from, std::unordered_map & old_to_new_nodes); static ActionsDAGPtr cloneSubDAG(const NodeRawConstPtrs & outputs, bool remove_aliases); diff --git a/src/Interpreters/ExpressionActions.cpp b/src/Interpreters/ExpressionActions.cpp index 7cbf5afd763..2eca31fc75e 100644 --- a/src/Interpreters/ExpressionActions.cpp +++ b/src/Interpreters/ExpressionActions.cpp @@ -53,7 +53,7 @@ ExpressionActions::ExpressionActions(ActionsDAGPtr actions_dag_, const Expressio : project_inputs(project_inputs_) , settings(settings_) { - actions_dag = actions_dag_->clone(); + actions_dag = ActionsDAG::clone(actions_dag_); /// It's important to determine lazy executed nodes before compiling expressions. std::unordered_set lazy_executed_nodes = processShortCircuitFunctions(*actions_dag, settings.short_circuit_function_evaluation); @@ -76,15 +76,17 @@ ExpressionActionsPtr ExpressionActions::clone() const auto copy = std::make_shared(ExpressionActions()); std::unordered_map copy_map; - copy->actions_dag = actions_dag->clone(copy_map); + copy->actions_dag = ActionsDAG::clone(actions_dag.get(), copy_map); copy->actions = actions; for (auto & action : copy->actions) action.node = copy_map[action.node]; + for (const auto * input : copy->actions_dag->getInputs()) + copy->input_positions.emplace(input->result_name, input_positions.at(input->result_name)); + copy->num_columns = num_columns; copy->required_columns = required_columns; - copy->input_positions = input_positions; copy->result_positions = result_positions; copy->sample_block = sample_block; diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index be00e37c751..6b49365b492 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -1944,7 +1944,7 @@ ExpressionAnalysisResult::ExpressionAnalysisResult( Block before_prewhere_sample = source_header; if (sanitizeBlock(before_prewhere_sample)) { - auto dag = prewhere_dag_and_flags->dag.clone(); + auto dag = ActionsDAG::clone(&prewhere_dag_and_flags->dag); ExpressionActions( std::move(dag), ExpressionActionsSettings::fromSettings(context->getSettingsRef())).execute(before_prewhere_sample); @@ -1980,7 +1980,7 @@ ExpressionAnalysisResult::ExpressionAnalysisResult( if (sanitizeBlock(before_where_sample)) { ExpressionActions( - before_where->dag.clone(), + ActionsDAG::clone(&before_where->dag), ExpressionActionsSettings::fromSettings(context->getSettingsRef())).execute(before_where_sample); auto & column_elem diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index 64a17a7ba87..71eb7dc64f8 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -1300,7 +1300,7 @@ static InterpolateDescriptionPtr getInterpolateDescription( ActionsDAGPtr actions = analyzer.getActionsDAG(true); ActionsDAGPtr conv_dag = ActionsDAG::makeConvertingActions(actions->getResultColumns(), result_columns, ActionsDAG::MatchColumnsMode::Position, true); - ActionsDAGPtr merge_dag = ActionsDAG::merge(std::move(*actions->clone()), std::move(*conv_dag)); + ActionsDAGPtr merge_dag = ActionsDAG::merge(std::move(* ActionsDAG::clone(actions)), std::move(*conv_dag)); interpolate_descr = std::make_shared(std::move(merge_dag), aliases); } @@ -2042,7 +2042,7 @@ void InterpreterSelectQuery::addEmptySourceToQueryPlan(QueryPlan & query_plan, c pipe.addSimpleTransform([&](const Block & header) { return std::make_shared(header, - std::make_shared(prewhere_info.row_level_filter->clone()), + std::make_shared(ActionsDAG::clone(prewhere_info.row_level_filter)), prewhere_info.row_level_column_name, true); }); } @@ -2050,7 +2050,7 @@ void InterpreterSelectQuery::addEmptySourceToQueryPlan(QueryPlan & query_plan, c pipe.addSimpleTransform([&](const Block & header) { return std::make_shared( - header, std::make_shared(prewhere_info.prewhere_actions->clone()), + header, std::make_shared(ActionsDAG::clone(prewhere_info.prewhere_actions)), prewhere_info.prewhere_column_name, prewhere_info.remove_prewhere_column); }); } @@ -2578,7 +2578,7 @@ void InterpreterSelectQuery::executeFetchColumns(QueryProcessingStage::Enum proc void InterpreterSelectQuery::executeWhere(QueryPlan & query_plan, const ActionsAndProjectInputsFlagPtr & expression, bool remove_filter) { - auto dag = expression->dag.clone(); + auto dag = ActionsDAG::clone(&expression->dag); if (expression->project_input) dag->appendInputsForUnusedColumns(query_plan.getCurrentDataStream().header); @@ -2752,7 +2752,7 @@ void InterpreterSelectQuery::executeMergeAggregated(QueryPlan & query_plan, bool void InterpreterSelectQuery::executeHaving(QueryPlan & query_plan, const ActionsAndProjectInputsFlagPtr & expression, bool remove_filter) { - auto dag = expression->dag.clone(); + auto dag = ActionsDAG::clone(&expression->dag); if (expression->project_input) dag->appendInputsForUnusedColumns(query_plan.getCurrentDataStream().header); @@ -2770,7 +2770,7 @@ void InterpreterSelectQuery::executeTotalsAndHaving( ActionsDAGPtr dag; if (expression) { - dag = expression->dag.clone(); + dag = ActionsDAG::clone(&expression->dag); if (expression->project_input) dag->appendInputsForUnusedColumns(query_plan.getCurrentDataStream().header); } @@ -2819,7 +2819,7 @@ void InterpreterSelectQuery::executeExpression(QueryPlan & query_plan, const Act if (!expression) return; - auto dag = expression->dag.clone(); + auto dag = ActionsDAG::clone(&expression->dag); if (expression->project_input) dag->appendInputsForUnusedColumns(query_plan.getCurrentDataStream().header); diff --git a/src/Interpreters/MutationsInterpreter.cpp b/src/Interpreters/MutationsInterpreter.cpp index 1bb770bf561..704c5ce7d8b 100644 --- a/src/Interpreters/MutationsInterpreter.cpp +++ b/src/Interpreters/MutationsInterpreter.cpp @@ -1278,7 +1278,7 @@ QueryPipelineBuilder MutationsInterpreter::addStreamsForLaterStages(const std::v if (i < stage.filter_column_names.size()) { - auto dag = step->actions()->dag.clone(); + auto dag = ActionsDAG::clone(&step->actions()->dag); if (step->actions()->project_input) dag->appendInputsForUnusedColumns(plan.getCurrentDataStream().header); /// Execute DELETEs. @@ -1286,7 +1286,7 @@ QueryPipelineBuilder MutationsInterpreter::addStreamsForLaterStages(const std::v } else { - auto dag = step->actions()->dag.clone(); + auto dag = ActionsDAG::clone(&step->actions()->dag); if (step->actions()->project_input) dag->appendInputsForUnusedColumns(plan.getCurrentDataStream().header); /// Execute UPDATE or final projection. diff --git a/src/Planner/Planner.cpp b/src/Planner/Planner.cpp index 681ae7e6ac4..dddb7531519 100644 --- a/src/Planner/Planner.cpp +++ b/src/Planner/Planner.cpp @@ -333,12 +333,12 @@ void addExpressionStep(QueryPlan & query_plan, const std::string & step_description, std::vector & result_actions_to_execute) { - auto actions = expression_actions->dag.clone(); + auto actions = ActionsDAG::clone(&expression_actions->dag); if (expression_actions->project_input) actions->appendInputsForUnusedColumns(query_plan.getCurrentDataStream().header); - result_actions_to_execute.push_back(actions.get()); auto expression_step = std::make_unique(query_plan.getCurrentDataStream(), actions); + result_actions_to_execute.push_back(expression_step->getExpression().get()); expression_step->setStepDescription(step_description); query_plan.addStep(std::move(expression_step)); } @@ -348,15 +348,15 @@ void addFilterStep(QueryPlan & query_plan, const std::string & step_description, std::vector & result_actions_to_execute) { - auto actions = filter_analysis_result.filter_actions->dag.clone(); + auto actions = ActionsDAG::clone(&filter_analysis_result.filter_actions->dag); if (filter_analysis_result.filter_actions->project_input) actions->appendInputsForUnusedColumns(query_plan.getCurrentDataStream().header); - result_actions_to_execute.push_back(actions.get()); auto where_step = std::make_unique(query_plan.getCurrentDataStream(), actions, filter_analysis_result.filter_column_name, filter_analysis_result.remove_filter_column); + result_actions_to_execute.push_back(where_step->getExpression().get()); where_step->setStepDescription(step_description); query_plan.addStep(std::move(where_step)); } @@ -556,11 +556,9 @@ void addTotalsHavingStep(QueryPlan & query_plan, ActionsDAGPtr actions; if (having_analysis_result.filter_actions) { - actions = having_analysis_result.filter_actions->dag.clone(); + actions = ActionsDAG::clone(&having_analysis_result.filter_actions->dag); if (having_analysis_result.filter_actions->project_input) actions->appendInputsForUnusedColumns(query_plan.getCurrentDataStream().header); - - result_actions_to_execute.push_back(actions.get()); } auto totals_having_step = std::make_unique( @@ -573,6 +571,10 @@ void addTotalsHavingStep(QueryPlan & query_plan, settings.totals_mode, settings.totals_auto_threshold, need_finalize); + + if (having_analysis_result.filter_actions) + result_actions_to_execute.push_back(totals_having_step->getActions().get()); + query_plan.addStep(std::move(totals_having_step)); } @@ -1449,7 +1451,7 @@ void Planner::buildPlanForQueryNode() if (it != table_filters.end()) { const auto & filters = it->second; - table_expression_data.setFilterActions(filters.filter_actions->clone()); + table_expression_data.setFilterActions(ActionsDAG::clone(filters.filter_actions)); table_expression_data.setPrewhereInfo(filters.prewhere_info); } } diff --git a/src/Planner/PlannerJoinTree.cpp b/src/Planner/PlannerJoinTree.cpp index 918cfad703e..16b5e363bfd 100644 --- a/src/Planner/PlannerJoinTree.cpp +++ b/src/Planner/PlannerJoinTree.cpp @@ -646,7 +646,7 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres auto table_expression_query_info = select_query_info; table_expression_query_info.table_expression = table_expression; - table_expression_query_info.filter_actions_dag = table_expression_data.getFilterActions()->clone(); + table_expression_query_info.filter_actions_dag = ActionsDAG::clone(table_expression_data.getFilterActions()); table_expression_query_info.analyzer_can_use_parallel_replicas_on_follower = table_node == planner_context->getGlobalPlannerContext()->parallel_replicas_table; size_t max_streams = settings.max_threads; @@ -776,7 +776,7 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres if (prewhere_actions) { prewhere_info = std::make_shared(); - prewhere_info->prewhere_actions = prewhere_actions->clone(); + prewhere_info->prewhere_actions = ActionsDAG::clone(prewhere_actions); prewhere_info->prewhere_column_name = prewhere_actions->getOutputs().at(0)->result_name; prewhere_info->remove_prewhere_column = true; prewhere_info->need_filter = true; @@ -831,7 +831,7 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres auto row_policy_filter_info = buildRowPolicyFilterIfNeeded(storage, table_expression_query_info, planner_context, used_row_policies); if (row_policy_filter_info.actions) - table_expression_data.setRowLevelFilterActions(row_policy_filter_info.actions->clone()); + table_expression_data.setRowLevelFilterActions(ActionsDAG::clone(row_policy_filter_info.actions)); add_filter(row_policy_filter_info, "Row-level security filter"); if (query_context->getParallelReplicasMode() == Context::ParallelReplicasMode::CUSTOM_KEY) @@ -1178,17 +1178,16 @@ JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_ join_table_expression, planner_context); - left_join_tree_query_plan.actions_dags.emplace_back(join_clauses_and_actions.left_join_expressions_actions.get()); - left_join_tree_query_plan.actions_dags.emplace_back(join_clauses_and_actions.right_join_expressions_actions.get()); - join_clauses_and_actions.left_join_expressions_actions->appendInputsForUnusedColumns(left_plan.getCurrentDataStream().header); auto left_join_expressions_actions_step = std::make_unique(left_plan.getCurrentDataStream(), join_clauses_and_actions.left_join_expressions_actions); left_join_expressions_actions_step->setStepDescription("JOIN actions"); + left_join_tree_query_plan.actions_dags.emplace_back(left_join_expressions_actions_step->getExpression().get()); left_plan.addStep(std::move(left_join_expressions_actions_step)); join_clauses_and_actions.right_join_expressions_actions->appendInputsForUnusedColumns(right_plan.getCurrentDataStream().header); auto right_join_expressions_actions_step = std::make_unique(right_plan.getCurrentDataStream(), join_clauses_and_actions.right_join_expressions_actions); right_join_expressions_actions_step->setStepDescription("JOIN actions"); + right_join_tree_query_plan.actions_dags.emplace_back(right_join_expressions_actions_step->getExpression().get()); right_plan.addStep(std::move(right_join_expressions_actions_step)); } @@ -1434,7 +1433,8 @@ JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_ auto result_plan = QueryPlan(); - if (join_algorithm->isFilled()) + bool is_filled_join = join_algorithm->isFilled(); + if (is_filled_join) { auto filled_join_step = std::make_unique( left_plan.getCurrentDataStream(), @@ -1586,8 +1586,9 @@ JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_ left_join_tree_query_plan.used_row_policies.insert(right_join_tree_query_plan_row_policy); /// Collect all required actions dags in `left_join_tree_query_plan.actions_dags` - for (const auto * action_dag : right_join_tree_query_plan.actions_dags) - left_join_tree_query_plan.actions_dags.emplace_back(action_dag); + if (!is_filled_join) + for (const auto * action_dag : right_join_tree_query_plan.actions_dags) + left_join_tree_query_plan.actions_dags.emplace_back(action_dag); // if (join_clauses_and_actions.left_join_expressions_actions) // left_join_tree_query_plan.actions_dags.emplace_back(join_clauses_and_actions.left_join_expressions_actions.get()); // if (join_clauses_and_actions.right_join_expressions_actions) @@ -1646,10 +1647,9 @@ JoinTreeQueryPlan buildQueryPlanForArrayJoinNode(const QueryTreeNodePtr & array_ array_join_action_dag->appendInputsForUnusedColumns(plan.getCurrentDataStream().header); - join_tree_query_plan.actions_dags.push_back(array_join_action_dag.get()); - auto array_join_actions = std::make_unique(plan.getCurrentDataStream(), std::move(array_join_action_dag)); array_join_actions->setStepDescription("ARRAY JOIN actions"); + join_tree_query_plan.actions_dags.push_back(array_join_actions->getExpression().get()); plan.addStep(std::move(array_join_actions)); auto drop_unused_columns_before_array_join_actions_dag = std::make_unique(plan.getCurrentDataStream().header.getColumnsWithTypeAndName()); diff --git a/src/Planner/PlannerJoins.cpp b/src/Planner/PlannerJoins.cpp index 45842c0d705..23b6a805ab9 100644 --- a/src/Planner/PlannerJoins.cpp +++ b/src/Planner/PlannerJoins.cpp @@ -588,10 +588,10 @@ JoinClausesAndActions buildJoinClausesAndActions( } } - result.left_join_expressions_actions = left_join_actions->clone(); + result.left_join_expressions_actions = ActionsDAG::clone(left_join_actions); result.left_join_tmp_expression_actions = std::move(left_join_actions); result.left_join_expressions_actions->removeUnusedActions(join_left_actions_names); - result.right_join_expressions_actions = right_join_actions->clone(); + result.right_join_expressions_actions = ActionsDAG::clone(right_join_actions); result.right_join_tmp_expression_actions = std::move(right_join_actions); result.right_join_expressions_actions->removeUnusedActions(join_right_actions_names); diff --git a/src/Planner/PlannerWindowFunctions.cpp b/src/Planner/PlannerWindowFunctions.cpp index 9deceeef9a3..b9e11578dbc 100644 --- a/src/Planner/PlannerWindowFunctions.cpp +++ b/src/Planner/PlannerWindowFunctions.cpp @@ -157,7 +157,9 @@ std::vector sortWindowDescriptions(const std::vector }; std::vector perm(window_descriptions.size()); - std::iota(perm.begin(), perm.end(), 0U); + for (size_t i = 0; i < perm.size(); ++i) + perm[i] = i; + ::sort(perm.begin(), perm.end(), comparator); return perm; diff --git a/src/Processors/QueryPlan/ExpressionStep.cpp b/src/Processors/QueryPlan/ExpressionStep.cpp index 90ac94a1ace..50bc2e1533e 100644 --- a/src/Processors/QueryPlan/ExpressionStep.cpp +++ b/src/Processors/QueryPlan/ExpressionStep.cpp @@ -30,13 +30,13 @@ ExpressionStep::ExpressionStep(const DataStream & input_stream_, const ActionsDA input_stream_, ExpressionTransform::transformHeader(input_stream_.header, *actions_dag_), getTraits(actions_dag_, input_stream_.header, input_stream_.sort_description)) - , actions_dag(actions_dag_->clone()) + , actions_dag(ActionsDAG::clone(actions_dag_)) { } void ExpressionStep::transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings & settings) { - auto expression = std::make_shared(actions_dag->clone(), settings.getActionsSettings()); + auto expression = std::make_shared(ActionsDAG::clone(actions_dag), settings.getActionsSettings()); pipeline.addSimpleTransform([&](const Block & header) { @@ -61,13 +61,13 @@ void ExpressionStep::transformPipeline(QueryPipelineBuilder & pipeline, const Bu void ExpressionStep::describeActions(FormatSettings & settings) const { String prefix(settings.offset, settings.indent_char); - auto expression = std::make_shared(actions_dag->clone()); + auto expression = std::make_shared(ActionsDAG::clone(actions_dag)); expression->describeActions(settings.out, prefix); } void ExpressionStep::describeActions(JSONBuilder::JSONMap & map) const { - auto expression = std::make_shared(actions_dag->clone()); + auto expression = std::make_shared(ActionsDAG::clone(actions_dag)); map.add("Expression", expression->toTree()); } diff --git a/src/Processors/QueryPlan/FilterStep.cpp b/src/Processors/QueryPlan/FilterStep.cpp index ef9f1d17822..7883461f45a 100644 --- a/src/Processors/QueryPlan/FilterStep.cpp +++ b/src/Processors/QueryPlan/FilterStep.cpp @@ -49,7 +49,7 @@ FilterStep::FilterStep( , filter_column_name(std::move(filter_column_name_)) , remove_filter_column(remove_filter_column_) { - actions_dag = actions_dag->clone(); + actions_dag = ActionsDAG::clone(actions_dag_); actions_dag->removeAliasesForFilter(filter_column_name); } @@ -87,7 +87,7 @@ void FilterStep::describeActions(FormatSettings & settings) const settings.out << " (removed)"; settings.out << '\n'; - auto expression = std::make_shared(actions_dag->clone()); + auto expression = std::make_shared(ActionsDAG::clone(actions_dag)); expression->describeActions(settings.out, prefix); } @@ -96,7 +96,7 @@ void FilterStep::describeActions(JSONBuilder::JSONMap & map) const map.add("Filter Column", filter_column_name); map.add("Removes Filter", remove_filter_column); - auto expression = std::make_shared(actions_dag->clone()); + auto expression = std::make_shared(ActionsDAG::clone(actions_dag)); map.add("Expression", expression->toTree()); } diff --git a/src/Processors/QueryPlan/Optimizations/distinctReadInOrder.cpp b/src/Processors/QueryPlan/Optimizations/distinctReadInOrder.cpp index 87e16b5a244..6cdc3cb4eb0 100644 --- a/src/Processors/QueryPlan/Optimizations/distinctReadInOrder.cpp +++ b/src/Processors/QueryPlan/Optimizations/distinctReadInOrder.cpp @@ -15,11 +15,11 @@ static ActionsDAGPtr buildActionsForPlanPath(std::vector & d if (dag_stack.empty()) return nullptr; - ActionsDAGPtr path_actions = dag_stack.back()->clone(); + ActionsDAGPtr path_actions = ActionsDAG::clone(dag_stack.back()); dag_stack.pop_back(); while (!dag_stack.empty()) { - ActionsDAGPtr clone = dag_stack.back()->clone(); + ActionsDAGPtr clone = ActionsDAG::clone(dag_stack.back()); dag_stack.pop_back(); path_actions->mergeInplace(std::move(*clone)); } diff --git a/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp b/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp index ff1cefff09a..f26cd79dd97 100644 --- a/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp +++ b/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp @@ -597,7 +597,7 @@ size_t tryPushDownFilter(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes filter_node.step = std::make_unique( filter_node.children.front()->step->getOutputStream(), - filter->getExpression()->clone(), + ActionsDAG::clone(filter->getExpression()), filter->getFilterColumnName(), filter->removesFilterColumn()); } @@ -611,7 +611,7 @@ size_t tryPushDownFilter(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes if (auto * read_from_merge = typeid_cast(child.get())) { - FilterDAGInfo info{filter->getExpression()->clone(), filter->getFilterColumnName(), filter->removesFilterColumn()}; + FilterDAGInfo info{ActionsDAG::clone(filter->getExpression()), filter->getFilterColumnName(), filter->removesFilterColumn()}; read_from_merge->addFilter(std::move(info)); std::swap(*parent_node, *child_node); return 1; diff --git a/src/Processors/QueryPlan/Optimizations/liftUpUnion.cpp b/src/Processors/QueryPlan/Optimizations/liftUpUnion.cpp index 35d8b1a35e4..4629bc0af53 100644 --- a/src/Processors/QueryPlan/Optimizations/liftUpUnion.cpp +++ b/src/Processors/QueryPlan/Optimizations/liftUpUnion.cpp @@ -49,7 +49,7 @@ size_t tryLiftUpUnion(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes) expr_node.step = std::make_unique( expr_node.children.front()->step->getOutputStream(), - expression->getExpression()->clone()); + ActionsDAG::clone(expression->getExpression())); } /// - Expression - Something diff --git a/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp b/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp index 13b691da888..afe1406b65f 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp @@ -108,7 +108,7 @@ void optimizePrewhere(Stack & stack, QueryPlan::Nodes &) prewhere_info->need_filter = true; prewhere_info->remove_prewhere_column = optimize_result.fully_moved_to_prewhere && filter_step->removesFilterColumn(); - auto filter_expression = filter_step->getExpression()->clone(); + auto filter_expression = ActionsDAG::clone(filter_step->getExpression()); const auto & filter_column_name = filter_step->getFilterColumnName(); if (prewhere_info->remove_prewhere_column) diff --git a/src/Processors/QueryPlan/Optimizations/optimizePrimaryKeyCondition.cpp b/src/Processors/QueryPlan/Optimizations/optimizePrimaryKeyCondition.cpp index e57d3319076..e5ded92b105 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizePrimaryKeyCondition.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizePrimaryKeyCondition.cpp @@ -18,15 +18,15 @@ void optimizePrimaryKeyCondition(const Stack & stack) const auto & storage_prewhere_info = source_step_with_filter->getPrewhereInfo(); if (storage_prewhere_info) { - source_step_with_filter->addFilter(storage_prewhere_info->prewhere_actions->clone(), storage_prewhere_info->prewhere_column_name); + source_step_with_filter->addFilter(ActionsDAG::clone(storage_prewhere_info->prewhere_actions), storage_prewhere_info->prewhere_column_name); if (storage_prewhere_info->row_level_filter) - source_step_with_filter->addFilter(storage_prewhere_info->row_level_filter->clone(), storage_prewhere_info->row_level_column_name); + source_step_with_filter->addFilter(ActionsDAG::clone(storage_prewhere_info->row_level_filter), storage_prewhere_info->row_level_column_name); } for (auto iter = stack.rbegin() + 1; iter != stack.rend(); ++iter) { if (auto * filter_step = typeid_cast(iter->node->step.get())) - source_step_with_filter->addFilter(filter_step->getExpression()->clone(), filter_step->getFilterColumnName()); + source_step_with_filter->addFilter(ActionsDAG::clone(filter_step->getExpression()), filter_step->getFilterColumnName()); /// Note: actually, plan optimizations merge Filter and Expression steps. /// Ideally, chain should look like (Expression -> ...) -> (Filter -> ...) -> ReadFromStorage, diff --git a/src/Processors/QueryPlan/Optimizations/optimizeReadInOrder.cpp b/src/Processors/QueryPlan/Optimizations/optimizeReadInOrder.cpp index 8e782e68db8..a8bd98d7460 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizeReadInOrder.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizeReadInOrder.cpp @@ -173,9 +173,9 @@ static void appendFixedColumnsFromFilterExpression(const ActionsDAG::Node & filt static void appendExpression(ActionsDAGPtr & dag, const ActionsDAGPtr & expression) { if (dag) - dag->mergeInplace(std::move(*expression->clone())); + dag->mergeInplace(std::move(*ActionsDAG::clone(expression))); else - dag = expression->clone(); + dag = ActionsDAG::clone(expression); } /// This function builds a common DAG which is a merge of DAGs from Filter and Expression steps chain. @@ -1066,13 +1066,13 @@ size_t tryReuseStorageOrderingForWindowFunctions(QueryPlan::Node * parent_node, for (const auto & actions_dag : window_desc.partition_by_actions) { order_by_elements_actions.emplace_back( - std::make_shared(actions_dag->clone(), ExpressionActionsSettings::fromContext(context, CompileExpressions::yes))); + std::make_shared(ActionsDAG::clone(actions_dag.get()), ExpressionActionsSettings::fromContext(context, CompileExpressions::yes))); } for (const auto & actions_dag : window_desc.order_by_actions) { order_by_elements_actions.emplace_back( - std::make_shared(actions_dag->clone(), ExpressionActionsSettings::fromContext(context, CompileExpressions::yes))); + std::make_shared(ActionsDAG::clone(actions_dag.get()), ExpressionActionsSettings::fromContext(context, CompileExpressions::yes))); } auto order_optimizer = std::make_shared( diff --git a/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp b/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp index 7c45ef48252..da057bd25c2 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp @@ -77,7 +77,7 @@ static AggregateProjectionInfo getAggregatingProjectionInfo( AggregateProjectionInfo info; info.context = interpreter.getContext(); - info.before_aggregation = analysis_result.before_aggregation->dag.clone(); + info.before_aggregation = ActionsDAG::clone(&analysis_result.before_aggregation->dag); info.keys = query_analyzer->aggregationKeys().getNames(); info.aggregates = query_analyzer->aggregates(); diff --git a/src/Processors/QueryPlan/Optimizations/projectionsCommon.cpp b/src/Processors/QueryPlan/Optimizations/projectionsCommon.cpp index d8b40b22904..0e2ad96a419 100644 --- a/src/Processors/QueryPlan/Optimizations/projectionsCommon.cpp +++ b/src/Processors/QueryPlan/Optimizations/projectionsCommon.cpp @@ -67,9 +67,9 @@ std::shared_ptr getMaxAddedBlocks(ReadFromMergeTree * rea void QueryDAG::appendExpression(const ActionsDAGPtr & expression) { if (dag) - dag->mergeInplace(std::move(*expression->clone())); + dag->mergeInplace(std::move(*ActionsDAG::clone(expression))); else - dag = expression->clone(); + dag = ActionsDAG::clone(expression); } const ActionsDAG::Node * findInOutputs(ActionsDAG & dag, const std::string & name, bool remove) @@ -238,7 +238,7 @@ bool analyzeProjectionCandidate( auto projection_query_info = query_info; projection_query_info.prewhere_info = nullptr; - projection_query_info.filter_actions_dag = dag->clone(); + projection_query_info.filter_actions_dag = ActionsDAG::clone(dag); auto projection_result_ptr = reader.estimateNumMarksToRead( std::move(projection_parts), diff --git a/src/Processors/QueryPlan/Optimizations/removeRedundantDistinct.cpp b/src/Processors/QueryPlan/Optimizations/removeRedundantDistinct.cpp index d3c75c988e7..81a8a537830 100644 --- a/src/Processors/QueryPlan/Optimizations/removeRedundantDistinct.cpp +++ b/src/Processors/QueryPlan/Optimizations/removeRedundantDistinct.cpp @@ -70,11 +70,11 @@ namespace if (dag_stack.empty()) return nullptr; - ActionsDAGPtr path_actions = dag_stack.back()->clone(); + ActionsDAGPtr path_actions = ActionsDAG::clone(dag_stack.back()); dag_stack.pop_back(); while (!dag_stack.empty()) { - ActionsDAGPtr clone = dag_stack.back()->clone(); + ActionsDAGPtr clone = ActionsDAG::clone(dag_stack.back()); logActionsDAG("DAG to merge", clone); dag_stack.pop_back(); path_actions->mergeInplace(std::move(*clone)); diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index e5370c1c130..d711b3e8472 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -798,7 +798,7 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreams(RangesInDataParts && parts_ info.use_uncompressed_cache); }; - auto sorting_expr = std::make_shared(metadata_for_reading->getSortingKey().expression->getActionsDAG().clone()); + auto sorting_expr = std::make_shared(ActionsDAG::clone(&metadata_for_reading->getSortingKey().expression->getActionsDAG())); SplitPartsWithRangesByPrimaryKeyResult split_ranges_result = splitPartsWithRangesByPrimaryKey( metadata_for_reading->getPrimaryKey(), @@ -1211,7 +1211,7 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsFinal( /// we will store lonely parts with level > 0 to use parallel select on them. RangesInDataParts non_intersecting_parts_by_primary_key; - auto sorting_expr = std::make_shared(metadata_for_reading->getSortingKey().expression->getActionsDAG().clone()); + auto sorting_expr = std::make_shared(ActionsDAG::clone(&metadata_for_reading->getSortingKey().expression->getActionsDAG())); if (prewhere_info) { @@ -1993,7 +1993,7 @@ void ReadFromMergeTree::initializePipeline(QueryPipelineBuilder & pipeline, cons if (result.sampling.use_sampling) { - auto sampling_actions = std::make_shared(result.sampling.filter_expression->clone()); + auto sampling_actions = std::make_shared(ActionsDAG::clone(result.sampling.filter_expression.get())); pipe.addSimpleTransform([&](const Block & header) { return std::make_shared( @@ -2031,7 +2031,7 @@ void ReadFromMergeTree::initializePipeline(QueryPipelineBuilder & pipeline, cons if (result_projection) { - auto projection_actions = std::make_shared(result_projection->clone()); + auto projection_actions = std::make_shared(ActionsDAG::clone(result_projection)); pipe.addSimpleTransform([&](const Block & header) { return std::make_shared(header, projection_actions); @@ -2126,7 +2126,7 @@ void ReadFromMergeTree::describeActions(FormatSettings & format_settings) const format_settings.out << " (removed)"; format_settings.out << '\n'; - auto expression = std::make_shared(prewhere_info->prewhere_actions->clone()); + auto expression = std::make_shared(ActionsDAG::clone(prewhere_info->prewhere_actions)); expression->describeActions(format_settings.out, prefix); } @@ -2135,7 +2135,7 @@ void ReadFromMergeTree::describeActions(FormatSettings & format_settings) const format_settings.out << prefix << "Row level filter" << '\n'; format_settings.out << prefix << "Row level filter column: " << prewhere_info->row_level_column_name << '\n'; - auto expression = std::make_shared(prewhere_info->row_level_filter->clone()); + auto expression = std::make_shared(ActionsDAG::clone(prewhere_info->row_level_filter)); expression->describeActions(format_settings.out, prefix); } } @@ -2161,7 +2161,7 @@ void ReadFromMergeTree::describeActions(JSONBuilder::JSONMap & map) const std::unique_ptr prewhere_filter_map = std::make_unique(); prewhere_filter_map->add("Prewhere filter column", prewhere_info->prewhere_column_name); prewhere_filter_map->add("Prewhere filter remove filter column", prewhere_info->remove_prewhere_column); - auto expression = std::make_shared(prewhere_info->prewhere_actions->clone()); + auto expression = std::make_shared(ActionsDAG::clone(prewhere_info->prewhere_actions)); prewhere_filter_map->add("Prewhere filter expression", expression->toTree()); prewhere_info_map->add("Prewhere filter", std::move(prewhere_filter_map)); @@ -2171,7 +2171,7 @@ void ReadFromMergeTree::describeActions(JSONBuilder::JSONMap & map) const { std::unique_ptr row_level_filter_map = std::make_unique(); row_level_filter_map->add("Row level filter column", prewhere_info->row_level_column_name); - auto expression = std::make_shared(prewhere_info->row_level_filter->clone()); + auto expression = std::make_shared(ActionsDAG::clone(prewhere_info->row_level_filter)); row_level_filter_map->add("Row level filter expression", expression->toTree()); prewhere_info_map->add("Row level filter", std::move(row_level_filter_map)); diff --git a/src/Processors/QueryPlan/SourceStepWithFilter.cpp b/src/Processors/QueryPlan/SourceStepWithFilter.cpp index b91debc8239..79b225e7f93 100644 --- a/src/Processors/QueryPlan/SourceStepWithFilter.cpp +++ b/src/Processors/QueryPlan/SourceStepWithFilter.cpp @@ -110,7 +110,7 @@ void SourceStepWithFilter::describeActions(FormatSettings & format_settings) con format_settings.out << " (removed)"; format_settings.out << '\n'; - auto expression = std::make_shared(prewhere_info->prewhere_actions->clone()); + auto expression = std::make_shared(ActionsDAG::clone(prewhere_info->prewhere_actions)); expression->describeActions(format_settings.out, prefix); } @@ -119,7 +119,7 @@ void SourceStepWithFilter::describeActions(FormatSettings & format_settings) con format_settings.out << prefix << "Row level filter" << '\n'; format_settings.out << prefix << "Row level filter column: " << prewhere_info->row_level_column_name << '\n'; - auto expression = std::make_shared(prewhere_info->row_level_filter->clone()); + auto expression = std::make_shared(ActionsDAG::clone(prewhere_info->row_level_filter)); expression->describeActions(format_settings.out, prefix); } } @@ -137,7 +137,7 @@ void SourceStepWithFilter::describeActions(JSONBuilder::JSONMap & map) const std::unique_ptr prewhere_filter_map = std::make_unique(); prewhere_filter_map->add("Prewhere filter column", prewhere_info->prewhere_column_name); prewhere_filter_map->add("Prewhere filter remove filter column", prewhere_info->remove_prewhere_column); - auto expression = std::make_shared(prewhere_info->prewhere_actions->clone()); + auto expression = std::make_shared(ActionsDAG::clone(prewhere_info->prewhere_actions)); prewhere_filter_map->add("Prewhere filter expression", expression->toTree()); prewhere_info_map->add("Prewhere filter", std::move(prewhere_filter_map)); @@ -147,7 +147,7 @@ void SourceStepWithFilter::describeActions(JSONBuilder::JSONMap & map) const { std::unique_ptr row_level_filter_map = std::make_unique(); row_level_filter_map->add("Row level filter column", prewhere_info->row_level_column_name); - auto expression = std::make_shared(prewhere_info->row_level_filter->clone()); + auto expression = std::make_shared(ActionsDAG::clone(prewhere_info->row_level_filter)); row_level_filter_map->add("Row level filter expression", expression->toTree()); prewhere_info_map->add("Row level filter", std::move(row_level_filter_map)); diff --git a/src/Processors/QueryPlan/TotalsHavingStep.cpp b/src/Processors/QueryPlan/TotalsHavingStep.cpp index 45de6c31d24..19632b1862f 100644 --- a/src/Processors/QueryPlan/TotalsHavingStep.cpp +++ b/src/Processors/QueryPlan/TotalsHavingStep.cpp @@ -46,7 +46,7 @@ TotalsHavingStep::TotalsHavingStep( getTraits(!filter_column_.empty())) , aggregates(aggregates_) , overflow_row(overflow_row_) - , actions_dag(actions_dag_->clone()) + , actions_dag(ActionsDAG::clone(actions_dag_)) , filter_column_name(filter_column_) , remove_filter(remove_filter_) , totals_mode(totals_mode_) @@ -57,7 +57,7 @@ TotalsHavingStep::TotalsHavingStep( void TotalsHavingStep::transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings & settings) { - auto expression_actions = actions_dag ? std::make_shared(actions_dag->clone(), settings.getActionsSettings()) : nullptr; + auto expression_actions = actions_dag ? std::make_shared(ActionsDAG::clone(actions_dag), settings.getActionsSettings()) : nullptr; auto totals_having = std::make_shared( pipeline.getHeader(), @@ -100,7 +100,7 @@ void TotalsHavingStep::describeActions(FormatSettings & settings) const if (actions_dag) { bool first = true; - auto expression = std::make_shared(actions_dag->clone()); + auto expression = std::make_shared(ActionsDAG::clone(actions_dag)); for (const auto & action : expression->getActions()) { settings.out << prefix << (first ? "Actions: " @@ -117,7 +117,7 @@ void TotalsHavingStep::describeActions(JSONBuilder::JSONMap & map) const if (actions_dag) { map.add("Filter column", filter_column_name); - auto expression = std::make_shared(actions_dag->clone()); + auto expression = std::make_shared(ActionsDAG::clone(actions_dag)); map.add("Expression", expression->toTree()); } } diff --git a/src/Processors/Transforms/FillingTransform.cpp b/src/Processors/Transforms/FillingTransform.cpp index 95267bc24e0..bbe57fc6441 100644 --- a/src/Processors/Transforms/FillingTransform.cpp +++ b/src/Processors/Transforms/FillingTransform.cpp @@ -203,7 +203,7 @@ FillingTransform::FillingTransform( , use_with_fill_by_sorting_prefix(use_with_fill_by_sorting_prefix_) { if (interpolate_description) - interpolate_actions = std::make_shared(interpolate_description->actions->clone()); + interpolate_actions = std::make_shared(ActionsDAG::clone(interpolate_description->actions)); std::vector is_fill_column(header_.columns()); for (size_t i = 0, size = fill_description.size(); i < size; ++i) diff --git a/src/Storages/MergeTree/MergeTreeIndexSet.cpp b/src/Storages/MergeTree/MergeTreeIndexSet.cpp index 7c65381b05b..8d4ef69b1b9 100644 --- a/src/Storages/MergeTree/MergeTreeIndexSet.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexSet.cpp @@ -265,7 +265,7 @@ MergeTreeIndexConditionSet::MergeTreeIndexConditionSet( if (!set->buildOrderedSetInplace(context)) return; - auto filter_actions_dag = filter_dag->clone(); + auto filter_actions_dag = ActionsDAG::clone(filter_dag); const auto * filter_actions_dag_node = filter_actions_dag->getOutputs().at(0); std::unordered_map node_to_result_node; @@ -319,7 +319,7 @@ static const ActionsDAG::NodeRawConstPtrs & getArguments(const ActionsDAG::Node return index_hint.getActions()->getOutputs(); /// Import the DAG and map argument pointers. - ActionsDAGPtr actions_clone = index_hint.getActions()->clone(); + ActionsDAGPtr actions_clone = ActionsDAG::clone(index_hint.getActions()); chassert(storage); result_dag_or_null->mergeNodes(std::move(*actions_clone), storage); return *storage; diff --git a/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp b/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp index 8fa5b2cc955..e924f853524 100644 --- a/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp +++ b/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp @@ -80,7 +80,7 @@ PrewhereExprInfo MergeTreeSelectProcessor::getPrewhereActions(PrewhereInfoPtr pr PrewhereExprStep row_level_filter_step { .type = PrewhereExprStep::Filter, - .actions = std::make_shared(prewhere_info->row_level_filter->clone(), actions_settings), + .actions = std::make_shared(ActionsDAG::clone(prewhere_info->row_level_filter), actions_settings), .filter_column_name = prewhere_info->row_level_column_name, .remove_filter_column = true, .need_filter = true, @@ -96,7 +96,7 @@ PrewhereExprInfo MergeTreeSelectProcessor::getPrewhereActions(PrewhereInfoPtr pr PrewhereExprStep prewhere_step { .type = PrewhereExprStep::Filter, - .actions = std::make_shared(prewhere_info->prewhere_actions->clone(), actions_settings), + .actions = std::make_shared(ActionsDAG::clone(prewhere_info->prewhere_actions), actions_settings), .filter_column_name = prewhere_info->prewhere_column_name, .remove_filter_column = prewhere_info->remove_prewhere_column, .need_filter = prewhere_info->need_filter, diff --git a/src/Storages/SelectQueryInfo.h b/src/Storages/SelectQueryInfo.h index 654b8b788fe..0b7035504ae 100644 --- a/src/Storages/SelectQueryInfo.h +++ b/src/Storages/SelectQueryInfo.h @@ -66,10 +66,10 @@ struct PrewhereInfo PrewhereInfoPtr prewhere_info = std::make_shared(); if (row_level_filter) - prewhere_info->row_level_filter = row_level_filter->clone(); + prewhere_info->row_level_filter = ActionsDAG::clone(row_level_filter); if (prewhere_actions) - prewhere_info->prewhere_actions = prewhere_actions->clone(); + prewhere_info->prewhere_actions = ActionsDAG::clone(prewhere_actions); prewhere_info->row_level_column_name = row_level_column_name; prewhere_info->prewhere_column_name = prewhere_column_name; diff --git a/src/Storages/StorageBuffer.cpp b/src/Storages/StorageBuffer.cpp index 9bddf4f0230..695b31d0c80 100644 --- a/src/Storages/StorageBuffer.cpp +++ b/src/Storages/StorageBuffer.cpp @@ -312,7 +312,7 @@ void StorageBuffer::read( if (src_table_query_info.prewhere_info->row_level_filter) { src_table_query_info.prewhere_info->row_level_filter = ActionsDAG::merge( - std::move(*actions_dag->clone()), + std::move(*ActionsDAG::clone(actions_dag)), std::move(*src_table_query_info.prewhere_info->row_level_filter)); src_table_query_info.prewhere_info->row_level_filter->removeUnusedActions(); @@ -321,7 +321,7 @@ void StorageBuffer::read( if (src_table_query_info.prewhere_info->prewhere_actions) { src_table_query_info.prewhere_info->prewhere_actions = ActionsDAG::merge( - std::move(*actions_dag->clone()), + std::move(*ActionsDAG::clone(actions_dag)), std::move(*src_table_query_info.prewhere_info->prewhere_actions)); src_table_query_info.prewhere_info->prewhere_actions->removeUnusedActions(); @@ -432,7 +432,7 @@ void StorageBuffer::read( { return std::make_shared( header, - std::make_shared(query_info.prewhere_info->row_level_filter->clone(), actions_settings), + std::make_shared(ActionsDAG::clone(query_info.prewhere_info->row_level_filter), actions_settings), query_info.prewhere_info->row_level_column_name, false); }); @@ -442,7 +442,7 @@ void StorageBuffer::read( { return std::make_shared( header, - std::make_shared(query_info.prewhere_info->prewhere_actions->clone(), actions_settings), + std::make_shared(ActionsDAG::clone(query_info.prewhere_info->prewhere_actions), actions_settings), query_info.prewhere_info->prewhere_column_name, query_info.prewhere_info->remove_prewhere_column); }); diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp index c42e3058347..d21a6dc20dd 100644 --- a/src/Storages/StorageMerge.cpp +++ b/src/Storages/StorageMerge.cpp @@ -662,7 +662,7 @@ std::vector ReadFromMerge::createChildrenPlans(SelectQ { auto filter_step = std::make_unique( child.plan.getCurrentDataStream(), - filter_info.actions->clone(), + ActionsDAG::clone(filter_info.actions), filter_info.column_name, filter_info.do_remove_column); @@ -1273,12 +1273,12 @@ void ReadFromMerge::RowPolicyData::extendNames(Names & names) const void ReadFromMerge::RowPolicyData::addStorageFilter(SourceStepWithFilter * step) const { - step->addFilter(actions_dag->clone(), filter_column_name); + step->addFilter(ActionsDAG::clone(actions_dag), filter_column_name); } void ReadFromMerge::RowPolicyData::addFilterTransform(QueryPlan & plan) const { - auto filter_step = std::make_unique(plan.getCurrentDataStream(), actions_dag->clone(), filter_column_name, true /* remove filter column */); + auto filter_step = std::make_unique(plan.getCurrentDataStream(), ActionsDAG::clone(actions_dag), filter_column_name, true /* remove filter column */); plan.addStep(std::move(filter_step)); } diff --git a/src/Storages/VirtualColumnUtils.cpp b/src/Storages/VirtualColumnUtils.cpp index 6f7d1d4c39f..1bd5e80a4f9 100644 --- a/src/Storages/VirtualColumnUtils.cpp +++ b/src/Storages/VirtualColumnUtils.cpp @@ -80,7 +80,7 @@ void buildSetsForDAG(const ActionsDAGPtr & dag, const ContextPtr & context) void filterBlockWithDAG(const ActionsDAGPtr & dag, Block & block, ContextPtr context) { buildSetsForDAG(dag, context); - auto actions = std::make_shared(dag->clone()); + auto actions = std::make_shared(ActionsDAG::clone(dag)); Block block_with_filter = block; actions->execute(block_with_filter, /*dry_run=*/ false, /*allow_duplicates_in_input=*/ true); @@ -318,7 +318,7 @@ static const ActionsDAG::Node * splitFilterNodeForAllowedInputs( { if (const auto * index_hint = typeid_cast(adaptor->getFunction().get())) { - auto index_hint_dag = index_hint->getActions()->clone(); + auto index_hint_dag = ActionsDAG::clone(index_hint->getActions()); ActionsDAG::NodeRawConstPtrs atoms; for (const auto & output : index_hint_dag->getOutputs()) if (const auto * child_copy = splitFilterNodeForAllowedInputs(output, allowed_inputs, additional_nodes)) From 068e1c55545dfc4e004b8b5970b3f237033444cd Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 25 Jun 2024 15:59:00 +0000 Subject: [PATCH 015/661] Fixing build. --- src/Processors/QueryPlan/Optimizations/mergeExpressions.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Processors/QueryPlan/Optimizations/mergeExpressions.cpp b/src/Processors/QueryPlan/Optimizations/mergeExpressions.cpp index 6ace1b3b5ce..97de69b1134 100644 --- a/src/Processors/QueryPlan/Optimizations/mergeExpressions.cpp +++ b/src/Processors/QueryPlan/Optimizations/mergeExpressions.cpp @@ -84,12 +84,12 @@ size_t tryMergeExpressions(QueryPlan::Node * parent_node, QueryPlan::Nodes &) if (child_actions->hasArrayJoin()) return 0; - auto actions = child_actions->clone(); + auto actions = ActionsDAG::clone(child_actions); const auto & child_filter_node = actions->findInOutputs(child_filter->getFilterColumnName()); if (child_filter->removesFilterColumn()) removeFromOutputs(*actions, child_filter_node); - actions->mergeInplace(std::move(*parent_actions->clone())); + actions->mergeInplace(std::move(*ActionsDAG::clone(parent_actions))); const auto & parent_filter_node = actions->findInOutputs(parent_filter->getFilterColumnName()); if (parent_filter->removesFilterColumn()) From 3149e51e9254b268c41ade796d3652d0c2dec8f7 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 25 Jun 2024 17:36:07 +0000 Subject: [PATCH 016/661] Fix other crashes. --- src/Storages/WindowView/StorageWindowView.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Storages/WindowView/StorageWindowView.cpp b/src/Storages/WindowView/StorageWindowView.cpp index 5822e46f9f8..8f39f0da5af 100644 --- a/src/Storages/WindowView/StorageWindowView.cpp +++ b/src/Storages/WindowView/StorageWindowView.cpp @@ -563,11 +563,11 @@ std::pair StorageWindowView::getNewBlocks(UInt32 watermark) auto syntax_result = TreeRewriter(getContext()).analyze(filter_function, builder.getHeader().getNamesAndTypesList()); auto filter_expression = ExpressionAnalyzer(filter_function, syntax_result, getContext()).getActionsDAG(false); + auto filter_actions = std::make_shared(std::move(filter_expression)); builder.addSimpleTransform([&](const Block & header) { - return std::make_shared( - header, std::make_shared(std::move(filter_expression)), filter_function->getColumnName(), true); + return std::make_shared(header, filter_actions, filter_function->getColumnName(), true); }); /// Adding window column From c998ec1e4f1b91f8ca20c2bd5a7acb6ac8d2e1b1 Mon Sep 17 00:00:00 2001 From: jsc0218 Date: Sat, 29 Jun 2024 02:40:22 +0000 Subject: [PATCH 017/661] add test and better naming --- src/Access/Common/AccessRightsElement.cpp | 4 +- src/Access/Common/AccessRightsElement.h | 2 +- src/Access/ContextAccess.cpp | 10 +-- ...xml => config_with_table_engine_grant.xml} | 0 .../config_without_table_engine_grant.xml | 5 ++ ...est.py => test_with_table_engine_grant.py} | 2 +- .../test_without_table_engine_grant.py | 81 +++++++++++++++++++ 7 files changed, 95 insertions(+), 9 deletions(-) rename tests/integration/test_grant_and_revoke/configs/{config.xml => config_with_table_engine_grant.xml} (100%) create mode 100644 tests/integration/test_grant_and_revoke/configs/config_without_table_engine_grant.xml rename tests/integration/test_grant_and_revoke/{test.py => test_with_table_engine_grant.py} (99%) create mode 100644 tests/integration/test_grant_and_revoke/test_without_table_engine_grant.py diff --git a/src/Access/Common/AccessRightsElement.cpp b/src/Access/Common/AccessRightsElement.cpp index 2ee13d6b94f..63bda09a51b 100644 --- a/src/Access/Common/AccessRightsElement.cpp +++ b/src/Access/Common/AccessRightsElement.cpp @@ -224,10 +224,10 @@ void AccessRightsElement::replaceEmptyDatabase(const String & current_database) String AccessRightsElement::toString() const { return toStringImpl(*this, true); } String AccessRightsElement::toStringWithoutOptions() const { return toStringImpl(*this, false); } -String AccessRightsElement::toStringWithoutONClause() const +String AccessRightsElement::toStringForAccessTypeSource() const { String result{access_flags.toKeywords().front()}; - return result + " ON {db.table}"; + return result + " ON *.*"; } bool AccessRightsElements::empty() const { return std::all_of(begin(), end(), [](const AccessRightsElement & e) { return e.empty(); }); } diff --git a/src/Access/Common/AccessRightsElement.h b/src/Access/Common/AccessRightsElement.h index 49764fc727f..78e94e6f2e4 100644 --- a/src/Access/Common/AccessRightsElement.h +++ b/src/Access/Common/AccessRightsElement.h @@ -89,7 +89,7 @@ struct AccessRightsElement /// Returns a human-readable representation like "GRANT SELECT, UPDATE(x, y) ON db.table". String toString() const; String toStringWithoutOptions() const; - String toStringWithoutONClause() const; + String toStringForAccessTypeSource() const; }; diff --git a/src/Access/ContextAccess.cpp b/src/Access/ContextAccess.cpp index 4620561053b..8ff1fc8ed21 100644 --- a/src/Access/ContextAccess.cpp +++ b/src/Access/ContextAccess.cpp @@ -622,7 +622,7 @@ bool ContextAccess::checkAccessImplHelper(AccessFlags flags, const Args &... arg /// since SOURCES is not granted actually. In order to solve this, turn the prompt logic back to Sources. if (flags & AccessType::TABLE_ENGINE && !access_control->doesTableEnginesRequireGrant()) { - AccessFlags newFlags; + AccessFlags new_flags; String table_engine_name{getTableEngine(args...)}; for (const auto & source_and_table_engine : source_and_table_engines) @@ -631,11 +631,11 @@ bool ContextAccess::checkAccessImplHelper(AccessFlags flags, const Args &... arg if (table_engine != table_engine_name) continue; const auto & source = std::get<0>(source_and_table_engine); /// Set the flags from Table Engine to SOURCES so that prompts can be meaningful. - newFlags = source; + new_flags = source; break; } - if (newFlags.isEmpty()) + if (new_flags.isEmpty()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Didn't find the target Source from the Table Engine"); if (grant_option && acs->isGranted(flags, args...)) @@ -644,12 +644,12 @@ bool ContextAccess::checkAccessImplHelper(AccessFlags flags, const Args &... arg "{}: Not enough privileges. " "The required privileges have been granted, but without grant option. " "To execute this query, it's necessary to have the grant {} WITH GRANT OPTION", - AccessRightsElement{newFlags}.toStringWithoutONClause()); + AccessRightsElement{new_flags}.toStringForAccessTypeSource()); } return access_denied(ErrorCodes::ACCESS_DENIED, "{}: Not enough privileges. To execute this query, it's necessary to have the grant {}", - AccessRightsElement{newFlags}.toStringWithoutONClause() + (grant_option ? " WITH GRANT OPTION" : "")); + AccessRightsElement{new_flags}.toStringForAccessTypeSource() + (grant_option ? " WITH GRANT OPTION" : "")); } if (grant_option && acs->isGranted(flags, args...)) diff --git a/tests/integration/test_grant_and_revoke/configs/config.xml b/tests/integration/test_grant_and_revoke/configs/config_with_table_engine_grant.xml similarity index 100% rename from tests/integration/test_grant_and_revoke/configs/config.xml rename to tests/integration/test_grant_and_revoke/configs/config_with_table_engine_grant.xml diff --git a/tests/integration/test_grant_and_revoke/configs/config_without_table_engine_grant.xml b/tests/integration/test_grant_and_revoke/configs/config_without_table_engine_grant.xml new file mode 100644 index 00000000000..d3571f281f5 --- /dev/null +++ b/tests/integration/test_grant_and_revoke/configs/config_without_table_engine_grant.xml @@ -0,0 +1,5 @@ + + + false + + diff --git a/tests/integration/test_grant_and_revoke/test.py b/tests/integration/test_grant_and_revoke/test_with_table_engine_grant.py similarity index 99% rename from tests/integration/test_grant_and_revoke/test.py rename to tests/integration/test_grant_and_revoke/test_with_table_engine_grant.py index e533cced1e4..25ca7913e4e 100644 --- a/tests/integration/test_grant_and_revoke/test.py +++ b/tests/integration/test_grant_and_revoke/test_with_table_engine_grant.py @@ -5,7 +5,7 @@ from helpers.test_tools import TSV cluster = ClickHouseCluster(__file__) instance = cluster.add_instance( "instance", - main_configs=["configs/config.xml"], + main_configs=["configs/config_with_table_engine_grant.xml"], user_configs=["configs/users.d/users.xml"], ) diff --git a/tests/integration/test_grant_and_revoke/test_without_table_engine_grant.py b/tests/integration/test_grant_and_revoke/test_without_table_engine_grant.py new file mode 100644 index 00000000000..210bb8ec465 --- /dev/null +++ b/tests/integration/test_grant_and_revoke/test_without_table_engine_grant.py @@ -0,0 +1,81 @@ +import pytest +from helpers.cluster import ClickHouseCluster +from helpers.test_tools import TSV + +cluster = ClickHouseCluster(__file__) +instance = cluster.add_instance( + "instance", + main_configs=["configs/config_without_table_engine_grant.xml"], + user_configs=["configs/users.d/users.xml"], +) + + +@pytest.fixture(scope="module", autouse=True) +def start_cluster(): + try: + cluster.start() + + instance.query("CREATE DATABASE test") + + yield cluster + + finally: + cluster.shutdown() + + +@pytest.fixture(autouse=True) +def cleanup_after_test(): + try: + yield + finally: + instance.query("DROP USER IF EXISTS A") + instance.query("DROP TABLE IF EXISTS test.table1") + + +def test_table_engine_and_source_grant(): + instance.query("DROP USER IF EXISTS A") + instance.query("CREATE USER A") + instance.query("GRANT CREATE TABLE ON test.table1 TO A") + + instance.query("GRANT POSTGRES ON *.* TO A") + + instance.query( + """ + CREATE TABLE test.table1(a Integer) + engine=PostgreSQL('localhost:5432', 'dummy', 'dummy', 'dummy', 'dummy'); + """, + user="A", + ) + + instance.query("DROP TABLE test.table1") + + instance.query("REVOKE POSTGRES ON *.* FROM A") + + assert "Not enough privileges" in instance.query_and_get_error( + """ + CREATE TABLE test.table1(a Integer) + engine=PostgreSQL('localhost:5432', 'dummy', 'dummy', 'dummy', 'dummy'); + """, + user="A", + ) + + # expecting grant POSTGRES instead of grant PostgreSQL due to discrepancy between source access type and table engine + assert "grant POSTGRES ON *.*" in instance.query_and_get_error( + """ + CREATE TABLE test.table1(a Integer) + engine=PostgreSQL('localhost:5432', 'dummy', 'dummy', 'dummy', 'dummy'); + """, + user="A", + ) + + instance.query("GRANT SOURCES ON *.* TO A") + + instance.query( + """ + CREATE TABLE test.table1(a Integer) + engine=PostgreSQL('localhost:5432', 'dummy', 'dummy', 'dummy', 'dummy'); + """, + user="A", + ) + + instance.query("DROP TABLE test.table1") From 9be404c9c7b1e14410928a4aef8396664d1e364e Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 2 Jul 2024 18:02:57 +0000 Subject: [PATCH 018/661] Fix another case. --- src/Planner/PlannerJoinTree.cpp | 2 +- src/Storages/StorageMerge.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Planner/PlannerJoinTree.cpp b/src/Planner/PlannerJoinTree.cpp index 16b5e363bfd..604d3366484 100644 --- a/src/Planner/PlannerJoinTree.cpp +++ b/src/Planner/PlannerJoinTree.cpp @@ -1383,11 +1383,11 @@ JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_ if (join_clauses_and_actions.mixed_join_expressions_actions) { - left_join_tree_query_plan.actions_dags.push_back(join_clauses_and_actions.mixed_join_expressions_actions.get()); ExpressionActionsPtr & mixed_join_expression = table_join->getMixedJoinExpression(); mixed_join_expression = std::make_shared( std::move(join_clauses_and_actions.mixed_join_expressions_actions), ExpressionActionsSettings::fromContext(planner_context->getQueryContext())); + left_join_tree_query_plan.actions_dags.push_back(&mixed_join_expression->getActionsDAG()); } } else if (join_node.isUsingJoinExpression()) diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp index 5c5ce4ecc2e..c3fdad3a8f2 100644 --- a/src/Storages/StorageMerge.cpp +++ b/src/Storages/StorageMerge.cpp @@ -1240,7 +1240,7 @@ ReadFromMerge::RowPolicyData::RowPolicyData(RowPolicyFilterPtr row_policy_filter auto expression_analyzer = ExpressionAnalyzer{expr, syntax_result, local_context}; actions_dag = expression_analyzer.getActionsDAG(false /* add_aliases */, false /* project_result */); - filter_actions = std::make_shared(std::move(actions_dag), + filter_actions = std::make_shared(ActionsDAG::clone(actions_dag), ExpressionActionsSettings::fromContext(local_context, CompileExpressions::yes)); const auto & required_columns = filter_actions->getRequiredColumnsWithTypes(); const auto & sample_block_columns = filter_actions->getSampleBlock().getNamesAndTypesList(); From 57818990f201562d0b6938c1b8de78d16bac471f Mon Sep 17 00:00:00 2001 From: jsc0218 Date: Tue, 2 Jul 2024 20:41:08 +0000 Subject: [PATCH 019/661] fix the test --- src/Access/ContextAccess.cpp | 32 +++++++++++++++++++------------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/src/Access/ContextAccess.cpp b/src/Access/ContextAccess.cpp index 8ff1fc8ed21..f534c334318 100644 --- a/src/Access/ContextAccess.cpp +++ b/src/Access/ContextAccess.cpp @@ -618,6 +618,22 @@ bool ContextAccess::checkAccessImplHelper(AccessFlags flags, const Args &... arg if (!granted) { + auto access_denied_no_grant = [&](AccessFlags access_flags, FmtArgs && ...fmt_args) + { + if (grant_option && acs->isGranted(access_flags, fmt_args...)) + { + return access_denied(ErrorCodes::ACCESS_DENIED, + "{}: Not enough privileges. " + "The required privileges have been granted, but without grant option. " + "To execute this query, it's necessary to have the grant {} WITH GRANT OPTION", + AccessRightsElement{access_flags, fmt_args...}.toStringWithoutOptions()); + } + + return access_denied(ErrorCodes::ACCESS_DENIED, + "{}: Not enough privileges. To execute this query, it's necessary to have the grant {}", + AccessRightsElement{access_flags, fmt_args...}.toStringWithoutOptions() + (grant_option ? " WITH GRANT OPTION" : "")); + }; + /// As we check the SOURCES from the Table Engine logic, direct prompt about Table Engine would be misleading /// since SOURCES is not granted actually. In order to solve this, turn the prompt logic back to Sources. if (flags & AccessType::TABLE_ENGINE && !access_control->doesTableEnginesRequireGrant()) @@ -635,8 +651,9 @@ bool ContextAccess::checkAccessImplHelper(AccessFlags flags, const Args &... arg break; } + /// Might happen in the case of grant Table Engine on A (but not source), then revoke A. if (new_flags.isEmpty()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Didn't find the target Source from the Table Engine"); + return access_denied_no_grant(flags, args...); if (grant_option && acs->isGranted(flags, args...)) { @@ -652,18 +669,7 @@ bool ContextAccess::checkAccessImplHelper(AccessFlags flags, const Args &... arg AccessRightsElement{new_flags}.toStringForAccessTypeSource() + (grant_option ? " WITH GRANT OPTION" : "")); } - if (grant_option && acs->isGranted(flags, args...)) - { - return access_denied(ErrorCodes::ACCESS_DENIED, - "{}: Not enough privileges. " - "The required privileges have been granted, but without grant option. " - "To execute this query, it's necessary to have the grant {} WITH GRANT OPTION", - AccessRightsElement{flags, args...}.toStringWithoutOptions()); - } - - return access_denied(ErrorCodes::ACCESS_DENIED, - "{}: Not enough privileges. To execute this query, it's necessary to have the grant {}", - AccessRightsElement{flags, args...}.toStringWithoutOptions() + (grant_option ? " WITH GRANT OPTION" : "")); + return access_denied_no_grant(flags, args...); } struct PrecalculatedFlags From 6a06024983a78aaab9b7cbe6e9533255debebdb3 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Wed, 3 Jul 2024 10:25:40 +0000 Subject: [PATCH 020/661] Fix for prewhere optimization. --- src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp b/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp index afe1406b65f..f203d831750 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp @@ -59,7 +59,7 @@ void optimizePrewhere(Stack & stack, QueryPlan::Nodes &) /// TODO: We can also check for UnionStep, such as StorageBuffer and local distributed plans. QueryPlan::Node * filter_node = (stack.rbegin() + 1)->node; - const auto * filter_step = typeid_cast(filter_node->step.get()); + auto * filter_step = typeid_cast(filter_node->step.get()); if (!filter_step) return; @@ -108,7 +108,7 @@ void optimizePrewhere(Stack & stack, QueryPlan::Nodes &) prewhere_info->need_filter = true; prewhere_info->remove_prewhere_column = optimize_result.fully_moved_to_prewhere && filter_step->removesFilterColumn(); - auto filter_expression = ActionsDAG::clone(filter_step->getExpression()); + auto filter_expression = std::move(filter_step->getExpression()); const auto & filter_column_name = filter_step->getFilterColumnName(); if (prewhere_info->remove_prewhere_column) @@ -121,7 +121,7 @@ void optimizePrewhere(Stack & stack, QueryPlan::Nodes &) outputs.resize(size); } - auto split_result = filter_step->getExpression()->split(optimize_result.prewhere_nodes, true, true); + auto split_result = filter_expression->split(optimize_result.prewhere_nodes, true, true); /// This is the leak of abstraction. /// Splited actions may have inputs which are needed only for PREWHERE. From 41c62ca6636572b4d7654dc0dc329740bb5c7425 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Wed, 3 Jul 2024 14:44:47 +0000 Subject: [PATCH 021/661] better --- src/Planner/Planner.cpp | 91 +++++++++++---------------------- src/Planner/PlannerJoinTree.cpp | 25 ++++----- src/Planner/PlannerJoinTree.h | 4 +- src/Planner/Utils.cpp | 30 +++++++++++ src/Planner/Utils.h | 3 ++ 5 files changed, 75 insertions(+), 78 deletions(-) diff --git a/src/Planner/Planner.cpp b/src/Planner/Planner.cpp index dddb7531519..16ee6de73c4 100644 --- a/src/Planner/Planner.cpp +++ b/src/Planner/Planner.cpp @@ -10,7 +10,6 @@ #include #include -#include #include #include @@ -331,14 +330,14 @@ public: void addExpressionStep(QueryPlan & query_plan, const ActionsAndProjectInputsFlagPtr & expression_actions, const std::string & step_description, - std::vector & result_actions_to_execute) + UsefulSets & useful_sets) { auto actions = ActionsDAG::clone(&expression_actions->dag); if (expression_actions->project_input) actions->appendInputsForUnusedColumns(query_plan.getCurrentDataStream().header); auto expression_step = std::make_unique(query_plan.getCurrentDataStream(), actions); - result_actions_to_execute.push_back(expression_step->getExpression().get()); + appendSetsFromActionsDAG(*expression_step->getExpression(), useful_sets); expression_step->setStepDescription(step_description); query_plan.addStep(std::move(expression_step)); } @@ -346,7 +345,7 @@ void addExpressionStep(QueryPlan & query_plan, void addFilterStep(QueryPlan & query_plan, const FilterAnalysisResult & filter_analysis_result, const std::string & step_description, - std::vector & result_actions_to_execute) + UsefulSets & useful_sets) { auto actions = ActionsDAG::clone(&filter_analysis_result.filter_actions->dag); if (filter_analysis_result.filter_actions->project_input) @@ -356,7 +355,7 @@ void addFilterStep(QueryPlan & query_plan, actions, filter_analysis_result.filter_column_name, filter_analysis_result.remove_filter_column); - result_actions_to_execute.push_back(where_step->getExpression().get()); + appendSetsFromActionsDAG(*where_step->getExpression(), useful_sets); where_step->setStepDescription(step_description); query_plan.addStep(std::move(where_step)); } @@ -544,7 +543,7 @@ void addTotalsHavingStep(QueryPlan & query_plan, const QueryAnalysisResult & query_analysis_result, const PlannerContextPtr & planner_context, const QueryNode & query_node, - std::vector & result_actions_to_execute) + UsefulSets & useful_sets) { const auto & query_context = planner_context->getQueryContext(); const auto & settings = query_context->getSettingsRef(); @@ -573,7 +572,7 @@ void addTotalsHavingStep(QueryPlan & query_plan, need_finalize); if (having_analysis_result.filter_actions) - result_actions_to_execute.push_back(totals_having_step->getActions().get()); + appendSetsFromActionsDAG(*totals_having_step->getActions(), useful_sets); query_plan.addStep(std::move(totals_having_step)); } @@ -887,7 +886,7 @@ void addPreliminarySortOrDistinctOrLimitStepsIfNeeded(QueryPlan & query_plan, const PlannerContextPtr & planner_context, const PlannerQueryProcessingInfo & query_processing_info, const QueryTreeNodePtr & query_tree, - std::vector & result_actions_to_execute) + UsefulSets & useful_sets) { const auto & query_node = query_tree->as(); @@ -919,7 +918,7 @@ void addPreliminarySortOrDistinctOrLimitStepsIfNeeded(QueryPlan & query_plan, if (expressions_analysis_result.hasLimitBy()) { const auto & limit_by_analysis_result = expressions_analysis_result.getLimitBy(); - addExpressionStep(query_plan, limit_by_analysis_result.before_limit_by_actions, "Before LIMIT BY", result_actions_to_execute); + addExpressionStep(query_plan, limit_by_analysis_result.before_limit_by_actions, "Before LIMIT BY", useful_sets); addLimitByStep(query_plan, limit_by_analysis_result, query_node); } @@ -1057,47 +1056,15 @@ void addOffsetStep(QueryPlan & query_plan, const QueryAnalysisResult & query_ana } } -void collectSetsFromActionsDAG(const ActionsDAG & dag, std::unordered_set & useful_sets) -{ - for (const auto & node : dag.getNodes()) - { - if (node.column) - { - const IColumn * column = node.column.get(); - if (const auto * column_const = typeid_cast(column)) - column = &column_const->getDataColumn(); - - if (const auto * column_set = typeid_cast(column)) - useful_sets.insert(column_set->getData().get()); - } - - if (node.type == ActionsDAG::ActionType::FUNCTION && node.function_base->getName() == "indexHint") - { - ActionsDAG::NodeRawConstPtrs children; - if (const auto * adaptor = typeid_cast(node.function_base.get())) - { - if (const auto * index_hint = typeid_cast(adaptor->getFunction().get())) - { - collectSetsFromActionsDAG(*index_hint->getActions(), useful_sets); - } - } - } - } -} - void addBuildSubqueriesForSetsStepIfNeeded( QueryPlan & query_plan, const SelectQueryOptions & select_query_options, const PlannerContextPtr & planner_context, - const std::vector & result_actions_to_execute) + const UsefulSets & useful_sets) { auto subqueries = planner_context->getPreparedSets().getSubqueries(); - std::unordered_set useful_sets; - for (const auto * actions_to_execute : result_actions_to_execute) - collectSetsFromActionsDAG(*actions_to_execute, useful_sets); - - auto predicate = [&useful_sets](const auto & set) { return !useful_sets.contains(set.get()); }; + auto predicate = [&useful_sets](const auto & set) { return !useful_sets.contains(set); }; auto it = std::remove_if(subqueries.begin(), subqueries.end(), std::move(predicate)); subqueries.erase(it, subqueries.end()); @@ -1542,15 +1509,15 @@ void Planner::buildPlanForQueryNode() planner_context, query_processing_info); - std::vector result_actions_to_execute = std::move(join_tree_query_plan.actions_dags); + auto useful_sets = std::move(join_tree_query_plan.useful_sets); for (auto & [_, table_expression_data] : planner_context->getTableExpressionNodeToData()) { if (table_expression_data.getPrewhereFilterActions()) - result_actions_to_execute.push_back(table_expression_data.getPrewhereFilterActions().get()); + appendSetsFromActionsDAG(*table_expression_data.getPrewhereFilterActions(), useful_sets); if (table_expression_data.getRowLevelFilterActions()) - result_actions_to_execute.push_back(table_expression_data.getRowLevelFilterActions().get()); + appendSetsFromActionsDAG(*table_expression_data.getRowLevelFilterActions(), useful_sets); } if (query_processing_info.isIntermediateStage()) @@ -1561,7 +1528,7 @@ void Planner::buildPlanForQueryNode() planner_context, query_processing_info, query_tree, - result_actions_to_execute); + useful_sets); if (expression_analysis_result.hasAggregation()) { @@ -1573,13 +1540,13 @@ void Planner::buildPlanForQueryNode() if (query_processing_info.isFirstStage()) { if (expression_analysis_result.hasWhere()) - addFilterStep(query_plan, expression_analysis_result.getWhere(), "WHERE", result_actions_to_execute); + addFilterStep(query_plan, expression_analysis_result.getWhere(), "WHERE", useful_sets); if (expression_analysis_result.hasAggregation()) { const auto & aggregation_analysis_result = expression_analysis_result.getAggregation(); if (aggregation_analysis_result.before_aggregation_actions) - addExpressionStep(query_plan, aggregation_analysis_result.before_aggregation_actions, "Before GROUP BY", result_actions_to_execute); + addExpressionStep(query_plan, aggregation_analysis_result.before_aggregation_actions, "Before GROUP BY", useful_sets); addAggregationStep(query_plan, aggregation_analysis_result, query_analysis_result, planner_context, select_query_info); } @@ -1598,7 +1565,7 @@ void Planner::buildPlanForQueryNode() */ const auto & window_analysis_result = expression_analysis_result.getWindow(); if (window_analysis_result.before_window_actions) - addExpressionStep(query_plan, window_analysis_result.before_window_actions, "Before WINDOW", result_actions_to_execute); + addExpressionStep(query_plan, window_analysis_result.before_window_actions, "Before WINDOW", useful_sets); } else { @@ -1607,7 +1574,7 @@ void Planner::buildPlanForQueryNode() * now, on shards (first_stage). */ const auto & projection_analysis_result = expression_analysis_result.getProjection(); - addExpressionStep(query_plan, projection_analysis_result.projection_actions, "Projection", result_actions_to_execute); + addExpressionStep(query_plan, projection_analysis_result.projection_actions, "Projection", useful_sets); if (query_node.isDistinct()) { @@ -1623,7 +1590,7 @@ void Planner::buildPlanForQueryNode() if (expression_analysis_result.hasSort()) { const auto & sort_analysis_result = expression_analysis_result.getSort(); - addExpressionStep(query_plan, sort_analysis_result.before_order_by_actions, "Before ORDER BY", result_actions_to_execute); + addExpressionStep(query_plan, sort_analysis_result.before_order_by_actions, "Before ORDER BY", useful_sets); } } } @@ -1634,7 +1601,7 @@ void Planner::buildPlanForQueryNode() planner_context, query_processing_info, query_tree, - result_actions_to_execute); + useful_sets); } if (query_processing_info.isSecondStage() || query_processing_info.isFromAggregationState()) @@ -1656,14 +1623,14 @@ void Planner::buildPlanForQueryNode() if (query_node.isGroupByWithTotals()) { - addTotalsHavingStep(query_plan, expression_analysis_result, query_analysis_result, planner_context, query_node, result_actions_to_execute); + addTotalsHavingStep(query_plan, expression_analysis_result, query_analysis_result, planner_context, query_node, useful_sets); having_executed = true; } addCubeOrRollupStepIfNeeded(query_plan, aggregation_analysis_result, query_analysis_result, planner_context, select_query_info, query_node); if (!having_executed && expression_analysis_result.hasHaving()) - addFilterStep(query_plan, expression_analysis_result.getHaving(), "HAVING", result_actions_to_execute); + addFilterStep(query_plan, expression_analysis_result.getHaving(), "HAVING", useful_sets); } if (query_processing_info.isFromAggregationState()) @@ -1678,16 +1645,16 @@ void Planner::buildPlanForQueryNode() { const auto & window_analysis_result = expression_analysis_result.getWindow(); if (expression_analysis_result.hasAggregation()) - addExpressionStep(query_plan, window_analysis_result.before_window_actions, "Before window functions", result_actions_to_execute); + addExpressionStep(query_plan, window_analysis_result.before_window_actions, "Before window functions", useful_sets); addWindowSteps(query_plan, planner_context, window_analysis_result); } if (expression_analysis_result.hasQualify()) - addFilterStep(query_plan, expression_analysis_result.getQualify(), "QUALIFY", result_actions_to_execute); + addFilterStep(query_plan, expression_analysis_result.getQualify(), "QUALIFY", useful_sets); const auto & projection_analysis_result = expression_analysis_result.getProjection(); - addExpressionStep(query_plan, projection_analysis_result.projection_actions, "Projection", result_actions_to_execute); + addExpressionStep(query_plan, projection_analysis_result.projection_actions, "Projection", useful_sets); if (query_node.isDistinct()) { @@ -1703,7 +1670,7 @@ void Planner::buildPlanForQueryNode() if (expression_analysis_result.hasSort()) { const auto & sort_analysis_result = expression_analysis_result.getSort(); - addExpressionStep(query_plan, sort_analysis_result.before_order_by_actions, "Before ORDER BY", result_actions_to_execute); + addExpressionStep(query_plan, sort_analysis_result.before_order_by_actions, "Before ORDER BY", useful_sets); } } else @@ -1756,7 +1723,7 @@ void Planner::buildPlanForQueryNode() if (!query_processing_info.isFromAggregationState() && expression_analysis_result.hasLimitBy()) { const auto & limit_by_analysis_result = expression_analysis_result.getLimitBy(); - addExpressionStep(query_plan, limit_by_analysis_result.before_limit_by_actions, "Before LIMIT BY", result_actions_to_execute); + addExpressionStep(query_plan, limit_by_analysis_result.before_limit_by_actions, "Before LIMIT BY", useful_sets); addLimitByStep(query_plan, limit_by_analysis_result, query_node); } @@ -1788,7 +1755,7 @@ void Planner::buildPlanForQueryNode() if (!query_processing_info.isToAggregationState()) { const auto & projection_analysis_result = expression_analysis_result.getProjection(); - addExpressionStep(query_plan, projection_analysis_result.project_names_actions, "Project names", result_actions_to_execute); + addExpressionStep(query_plan, projection_analysis_result.project_names_actions, "Project names", useful_sets); } // For additional_result_filter setting @@ -1796,7 +1763,7 @@ void Planner::buildPlanForQueryNode() } if (!select_query_options.only_analyze) - addBuildSubqueriesForSetsStepIfNeeded(query_plan, select_query_options, planner_context, result_actions_to_execute); + addBuildSubqueriesForSetsStepIfNeeded(query_plan, select_query_options, planner_context, useful_sets); query_node_to_plan_step_mapping[&query_node] = query_plan.getRootNode(); } diff --git a/src/Planner/PlannerJoinTree.cpp b/src/Planner/PlannerJoinTree.cpp index 604d3366484..94054588d40 100644 --- a/src/Planner/PlannerJoinTree.cpp +++ b/src/Planner/PlannerJoinTree.cpp @@ -1181,13 +1181,13 @@ JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_ join_clauses_and_actions.left_join_expressions_actions->appendInputsForUnusedColumns(left_plan.getCurrentDataStream().header); auto left_join_expressions_actions_step = std::make_unique(left_plan.getCurrentDataStream(), join_clauses_and_actions.left_join_expressions_actions); left_join_expressions_actions_step->setStepDescription("JOIN actions"); - left_join_tree_query_plan.actions_dags.emplace_back(left_join_expressions_actions_step->getExpression().get()); + appendSetsFromActionsDAG(*left_join_expressions_actions_step->getExpression(), left_join_tree_query_plan.useful_sets); left_plan.addStep(std::move(left_join_expressions_actions_step)); join_clauses_and_actions.right_join_expressions_actions->appendInputsForUnusedColumns(right_plan.getCurrentDataStream().header); auto right_join_expressions_actions_step = std::make_unique(right_plan.getCurrentDataStream(), join_clauses_and_actions.right_join_expressions_actions); right_join_expressions_actions_step->setStepDescription("JOIN actions"); - right_join_tree_query_plan.actions_dags.emplace_back(right_join_expressions_actions_step->getExpression().get()); + appendSetsFromActionsDAG(*right_join_expressions_actions_step->getExpression(), right_join_tree_query_plan.useful_sets); right_plan.addStep(std::move(right_join_expressions_actions_step)); } @@ -1387,7 +1387,8 @@ JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_ mixed_join_expression = std::make_shared( std::move(join_clauses_and_actions.mixed_join_expressions_actions), ExpressionActionsSettings::fromContext(planner_context->getQueryContext())); - left_join_tree_query_plan.actions_dags.push_back(&mixed_join_expression->getActionsDAG()); + + appendSetsFromActionsDAG(mixed_join_expression->getActionsDAG(), left_join_tree_query_plan.useful_sets); } } else if (join_node.isUsingJoinExpression()) @@ -1585,16 +1586,10 @@ JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_ for (const auto & right_join_tree_query_plan_row_policy : right_join_tree_query_plan.used_row_policies) left_join_tree_query_plan.used_row_policies.insert(right_join_tree_query_plan_row_policy); - /// Collect all required actions dags in `left_join_tree_query_plan.actions_dags` + /// Collect all required actions sets in `left_join_tree_query_plan.useful_sets` if (!is_filled_join) - for (const auto * action_dag : right_join_tree_query_plan.actions_dags) - left_join_tree_query_plan.actions_dags.emplace_back(action_dag); - // if (join_clauses_and_actions.left_join_expressions_actions) - // left_join_tree_query_plan.actions_dags.emplace_back(join_clauses_and_actions.left_join_expressions_actions.get()); - // if (join_clauses_and_actions.right_join_expressions_actions) - // left_join_tree_query_plan.actions_dags.emplace_back(join_clauses_and_actions.right_join_expressions_actions.get()); - // if (join_clauses_and_actions.mixed_join_expressions_actions) - // left_join_tree_query_plan.actions_dags.push_back(join_clauses_and_actions.mixed_join_expressions_actions.get()); + for (const auto & useful_set : right_join_tree_query_plan.useful_sets) + left_join_tree_query_plan.useful_sets.insert(useful_set); auto mapping = std::move(left_join_tree_query_plan.query_node_to_plan_step_mapping); auto & r_mapping = right_join_tree_query_plan.query_node_to_plan_step_mapping; @@ -1604,7 +1599,7 @@ JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_ .query_plan = std::move(result_plan), .from_stage = QueryProcessingStage::FetchColumns, .used_row_policies = std::move(left_join_tree_query_plan.used_row_policies), - .actions_dags = std::move(left_join_tree_query_plan.actions_dags), + .useful_sets = std::move(left_join_tree_query_plan.useful_sets), .query_node_to_plan_step_mapping = std::move(mapping), }; } @@ -1649,7 +1644,7 @@ JoinTreeQueryPlan buildQueryPlanForArrayJoinNode(const QueryTreeNodePtr & array_ auto array_join_actions = std::make_unique(plan.getCurrentDataStream(), std::move(array_join_action_dag)); array_join_actions->setStepDescription("ARRAY JOIN actions"); - join_tree_query_plan.actions_dags.push_back(array_join_actions->getExpression().get()); + appendSetsFromActionsDAG(*array_join_actions->getExpression(), join_tree_query_plan.useful_sets); plan.addStep(std::move(array_join_actions)); auto drop_unused_columns_before_array_join_actions_dag = std::make_unique(plan.getCurrentDataStream().header.getColumnsWithTypeAndName()); @@ -1690,7 +1685,7 @@ JoinTreeQueryPlan buildQueryPlanForArrayJoinNode(const QueryTreeNodePtr & array_ .query_plan = std::move(plan), .from_stage = QueryProcessingStage::FetchColumns, .used_row_policies = std::move(join_tree_query_plan.used_row_policies), - .actions_dags = std::move(join_tree_query_plan.actions_dags), + .useful_sets = std::move(join_tree_query_plan.useful_sets), .query_node_to_plan_step_mapping = std::move(join_tree_query_plan.query_node_to_plan_step_mapping), }; } diff --git a/src/Planner/PlannerJoinTree.h b/src/Planner/PlannerJoinTree.h index 675079427eb..bc58e802a09 100644 --- a/src/Planner/PlannerJoinTree.h +++ b/src/Planner/PlannerJoinTree.h @@ -11,12 +11,14 @@ namespace DB { +using UsefulSets = std::unordered_set; + struct JoinTreeQueryPlan { QueryPlan query_plan; QueryProcessingStage::Enum from_stage; std::set used_row_policies{}; - std::vector actions_dags{}; + UsefulSets useful_sets; std::unordered_map query_node_to_plan_step_mapping{}; }; diff --git a/src/Planner/Utils.cpp b/src/Planner/Utils.cpp index 493ecf5ef53..7ac53e0f8c1 100644 --- a/src/Planner/Utils.cpp +++ b/src/Planner/Utils.cpp @@ -11,10 +11,12 @@ #include #include +#include #include #include +#include #include @@ -475,4 +477,32 @@ ASTPtr parseAdditionalResultFilter(const Settings & settings) return additional_result_filter_ast; } +void appendSetsFromActionsDAG(const ActionsDAG & dag, UsefulSets & useful_sets) +{ + for (const auto & node : dag.getNodes()) + { + if (node.column) + { + const IColumn * column = node.column.get(); + if (const auto * column_const = typeid_cast(column)) + column = &column_const->getDataColumn(); + + if (const auto * column_set = typeid_cast(column)) + useful_sets.insert(column_set->getData()); + } + + if (node.type == ActionsDAG::ActionType::FUNCTION && node.function_base->getName() == "indexHint") + { + ActionsDAG::NodeRawConstPtrs children; + if (const auto * adaptor = typeid_cast(node.function_base.get())) + { + if (const auto * index_hint = typeid_cast(adaptor->getFunction().get())) + { + appendSetsFromActionsDAG(*index_hint->getActions(), useful_sets); + } + } + } + } +} + } diff --git a/src/Planner/Utils.h b/src/Planner/Utils.h index 3172847f053..ae60976a8d6 100644 --- a/src/Planner/Utils.h +++ b/src/Planner/Utils.h @@ -88,4 +88,7 @@ FilterDAGInfo buildFilterInfo(QueryTreeNodePtr filter_query_tree, ASTPtr parseAdditionalResultFilter(const Settings & settings); +using UsefulSets = std::unordered_set; +void appendSetsFromActionsDAG(const ActionsDAG & dag, UsefulSets & useful_sets); + } From 5d16ba57aa84ef82ccf7e34a4635ad1d14e7859d Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Wed, 3 Jul 2024 14:46:49 +0000 Subject: [PATCH 022/661] Update version_date.tsv and changelogs after v24.3.5.46-lts --- docs/changelogs/v24.3.5.46-lts.md | 40 ++++++++++++++++++++++++++++ utils/list-versions/version_date.tsv | 1 + 2 files changed, 41 insertions(+) create mode 100644 docs/changelogs/v24.3.5.46-lts.md diff --git a/docs/changelogs/v24.3.5.46-lts.md b/docs/changelogs/v24.3.5.46-lts.md new file mode 100644 index 00000000000..1f2b7c8b0b7 --- /dev/null +++ b/docs/changelogs/v24.3.5.46-lts.md @@ -0,0 +1,40 @@ +--- +sidebar_position: 1 +sidebar_label: 2024 +--- + +# 2024 Changelog + +### ClickHouse release v24.3.5.46-lts (fe54cead6b6) FIXME as compared to v24.3.4.147-lts (31a7bdc346d) + +#### Improvement +* Backported in [#65463](https://github.com/ClickHouse/ClickHouse/issues/65463): Reload certificate chain during certificate reload. [#61671](https://github.com/ClickHouse/ClickHouse/pull/61671) ([Pervakov Grigorii](https://github.com/GrigoryPervakov)). +* Backported in [#65882](https://github.com/ClickHouse/ClickHouse/issues/65882): Always start Keeper with sufficient amount of threads in global thread pool. [#64444](https://github.com/ClickHouse/ClickHouse/pull/64444) ([Duc Canh Le](https://github.com/canhld94)). +* Backported in [#65302](https://github.com/ClickHouse/ClickHouse/issues/65302): Returned back the behaviour of how ClickHouse works and interprets Tuples in CSV format. This change effectively reverts https://github.com/ClickHouse/ClickHouse/pull/60994 and makes it available only under a few settings: `output_format_csv_serialize_tuple_into_separate_columns`, `input_format_csv_deserialize_separate_columns_into_tuple` and `input_format_csv_try_infer_strings_from_quoted_tuples`. [#65170](https://github.com/ClickHouse/ClickHouse/pull/65170) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Backported in [#65892](https://github.com/ClickHouse/ClickHouse/issues/65892): Respect cgroup CPU limit in Keeper. [#65819](https://github.com/ClickHouse/ClickHouse/pull/65819) ([Antonio Andelic](https://github.com/antonio2368)). + +#### Critical Bug Fix (crash, LOGICAL_ERROR, data loss, RBAC) +* Backported in [#65283](https://github.com/ClickHouse/ClickHouse/issues/65283): Fix crash with UniqInjectiveFunctionsEliminationPass and uniqCombined. [#65188](https://github.com/ClickHouse/ClickHouse/pull/65188) ([Raúl Marín](https://github.com/Algunenano)). +* Backported in [#65370](https://github.com/ClickHouse/ClickHouse/issues/65370): Fix a bug in ClickHouse Keeper that causes digest mismatch during closing session. [#65198](https://github.com/ClickHouse/ClickHouse/pull/65198) ([Aleksei Filatov](https://github.com/aalexfvk)). +* Backported in [#65446](https://github.com/ClickHouse/ClickHouse/issues/65446): Use correct memory alignment for Distinct combinator. Previously, crash could happen because of invalid memory allocation when the combinator was used. [#65379](https://github.com/ClickHouse/ClickHouse/pull/65379) ([Antonio Andelic](https://github.com/antonio2368)). +* Backported in [#65708](https://github.com/ClickHouse/ClickHouse/issues/65708): Fix crash in maxIntersections. [#65689](https://github.com/ClickHouse/ClickHouse/pull/65689) ([Raúl Marín](https://github.com/Algunenano)). + +#### Bug Fix (user-visible misbehavior in an official stable release) +* Backported in [#65352](https://github.com/ClickHouse/ClickHouse/issues/65352): Fix possible abort on uncaught exception in ~WriteBufferFromFileDescriptor in StatusFile. [#64206](https://github.com/ClickHouse/ClickHouse/pull/64206) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#65327](https://github.com/ClickHouse/ClickHouse/issues/65327): Fix the crash loop when restoring from backup is blocked by creating an MV with a definer that hasn't been restored yet. [#64595](https://github.com/ClickHouse/ClickHouse/pull/64595) ([pufit](https://github.com/pufit)). +* Backported in [#65538](https://github.com/ClickHouse/ClickHouse/issues/65538): Fix crash for `ALTER TABLE ... ON CLUSTER ... MODIFY SQL SECURITY`. [#64957](https://github.com/ClickHouse/ClickHouse/pull/64957) ([pufit](https://github.com/pufit)). +* Backported in [#65576](https://github.com/ClickHouse/ClickHouse/issues/65576): Fix crash on destroying AccessControl: add explicit shutdown. [#64993](https://github.com/ClickHouse/ClickHouse/pull/64993) ([Vitaly Baranov](https://github.com/vitlibar)). +* Backported in [#65159](https://github.com/ClickHouse/ClickHouse/issues/65159): Fix pushing arithmetic operations out of aggregation. In the new analyzer, optimization was applied only once. [#65104](https://github.com/ClickHouse/ClickHouse/pull/65104) ([Dmitry Novik](https://github.com/novikd)). +* Backported in [#65615](https://github.com/ClickHouse/ClickHouse/issues/65615): Fix aggregate function name rewriting in the new analyzer. [#65110](https://github.com/ClickHouse/ClickHouse/pull/65110) ([Dmitry Novik](https://github.com/novikd)). +* Backported in [#65728](https://github.com/ClickHouse/ClickHouse/issues/65728): Eliminate injective function in argument of functions `uniq*` recursively. This used to work correctly but was broken in the new analyzer. [#65140](https://github.com/ClickHouse/ClickHouse/pull/65140) ([Duc Canh Le](https://github.com/canhld94)). +* Backported in [#65261](https://github.com/ClickHouse/ClickHouse/issues/65261): Fix the bug in Hashed and Hashed_Array dictionary short circuit evaluation, which may read uninitialized number, leading to various errors. [#65256](https://github.com/ClickHouse/ClickHouse/pull/65256) ([jsc0218](https://github.com/jsc0218)). +* Backported in [#65667](https://github.com/ClickHouse/ClickHouse/issues/65667): Disable `non-intersecting-parts` optimization for queries with `FINAL` in case of `read-in-order` optimization was enabled. This could lead to an incorrect query result. As a workaround, disable `do_not_merge_across_partitions_select_final` and `split_parts_ranges_into_intersecting_and_non_intersecting_final` before this fix is merged. [#65505](https://github.com/ClickHouse/ClickHouse/pull/65505) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#65784](https://github.com/ClickHouse/ClickHouse/issues/65784): Fixed bug in MergeJoin. Column in sparse serialisation might be treated as a column of its nested type though the required conversion wasn't performed. [#65632](https://github.com/ClickHouse/ClickHouse/pull/65632) ([Nikita Taranov](https://github.com/nickitat)). +* Backported in [#65929](https://github.com/ClickHouse/ClickHouse/issues/65929): For queries that read from `PostgreSQL`, cancel the internal `PostgreSQL` query if the ClickHouse query is finished. Otherwise, `ClickHouse` query cannot be canceled until the internal `PostgreSQL` query is finished. [#65771](https://github.com/ClickHouse/ClickHouse/pull/65771) ([Maksim Kita](https://github.com/kitaisreal)). +* Backported in [#65824](https://github.com/ClickHouse/ClickHouse/issues/65824): Fix a bug in short circuit logic when old analyzer and dictGetOrDefault is used. [#65802](https://github.com/ClickHouse/ClickHouse/pull/65802) ([jsc0218](https://github.com/jsc0218)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Backported in [#65223](https://github.com/ClickHouse/ClickHouse/issues/65223): Capture weak_ptr of ContextAccess for safety. [#65051](https://github.com/ClickHouse/ClickHouse/pull/65051) ([Alexander Gololobov](https://github.com/davenger)). +* Backported in [#65901](https://github.com/ClickHouse/ClickHouse/issues/65901): Fix bug with session closing in Keeper. [#65735](https://github.com/ClickHouse/ClickHouse/pull/65735) ([Antonio Andelic](https://github.com/antonio2368)). + diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv index 8112ed9083b..8e748a2c2ca 100644 --- a/utils/list-versions/version_date.tsv +++ b/utils/list-versions/version_date.tsv @@ -6,6 +6,7 @@ v24.5.1.1763-stable 2024-06-01 v24.4.3.25-stable 2024-06-14 v24.4.2.141-stable 2024-06-07 v24.4.1.2088-stable 2024-05-01 +v24.3.5.46-lts 2024-07-03 v24.3.4.147-lts 2024-06-13 v24.3.3.102-lts 2024-05-01 v24.3.2.23-lts 2024-04-03 From 7843313f8e09eb018a481b4ba70fcf5fc147105e Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 5 Jul 2024 18:20:50 +0200 Subject: [PATCH 023/661] Update PlannerJoinTree.h --- src/Planner/PlannerJoinTree.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Planner/PlannerJoinTree.h b/src/Planner/PlannerJoinTree.h index bc58e802a09..259622b1d50 100644 --- a/src/Planner/PlannerJoinTree.h +++ b/src/Planner/PlannerJoinTree.h @@ -18,7 +18,7 @@ struct JoinTreeQueryPlan QueryPlan query_plan; QueryProcessingStage::Enum from_stage; std::set used_row_policies{}; - UsefulSets useful_sets; + UsefulSets useful_sets{}; std::unordered_map query_node_to_plan_step_mapping{}; }; From 2257f9a2aee5e8a5c5e178e5f7ccaf269018756a Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 5 Jul 2024 17:49:50 +0000 Subject: [PATCH 024/661] Change ActionsDAGPtr to ActionsDAG where possible. --- src/Core/InterpolateDescription.cpp | 6 +- src/Core/InterpolateDescription.h | 7 +- src/Functions/indexHint.h | 6 +- src/Interpreters/ActionsDAG.cpp | 148 +++++++++--------- src/Interpreters/ActionsDAG.h | 46 +++--- src/Interpreters/ActionsVisitor.cpp | 4 +- src/Interpreters/ExpressionActions.cpp | 19 ++- src/Interpreters/ExpressionActions.h | 8 +- src/Interpreters/ExpressionAnalyzer.cpp | 51 +++--- src/Interpreters/ExpressionAnalyzer.h | 6 +- src/Interpreters/GlobalSubqueriesVisitor.h | 2 +- .../IInterpreterUnionOrSelectQuery.cpp | 12 +- src/Interpreters/InterpreterSelectQuery.cpp | 49 +++--- src/Interpreters/InterpreterSelectQuery.h | 2 +- src/Interpreters/MutationsInterpreter.cpp | 14 +- .../MySQL/InterpretersMySQLDDLQuery.cpp | 2 +- src/Interpreters/TableJoin.cpp | 49 +++--- src/Interpreters/TableJoin.h | 10 +- src/Interpreters/addMissingDefaults.cpp | 24 +-- src/Interpreters/addMissingDefaults.h | 2 +- .../evaluateConstantExpression.cpp | 2 +- src/Interpreters/inplaceBlockConversions.cpp | 16 +- src/Interpreters/inplaceBlockConversions.h | 6 +- src/Planner/Planner.cpp | 46 +++--- src/Planner/PlannerActionsVisitor.cpp | 14 +- src/Planner/PlannerContext.h | 2 +- src/Planner/PlannerJoinTree.cpp | 81 +++++----- src/Planner/PlannerJoins.cpp | 56 +++---- src/Planner/PlannerJoins.h | 10 +- src/Planner/Utils.cpp | 10 +- src/Processors/QueryPlan/AggregatingStep.cpp | 16 +- src/Processors/QueryPlan/CubeStep.cpp | 12 +- .../QueryPlan/DistributedCreateLocalPlan.cpp | 2 +- src/Processors/QueryPlan/ExpressionStep.cpp | 22 +-- src/Processors/QueryPlan/ExpressionStep.h | 11 +- src/Processors/QueryPlan/FilterStep.cpp | 20 +-- src/Processors/QueryPlan/FilterStep.h | 12 +- .../convertOuterJoinToInnerJoin.cpp | 4 +- .../Optimizations/distinctReadInOrder.cpp | 4 +- .../Optimizations/filterPushDown.cpp | 50 +++--- .../Optimizations/liftUpArrayJoin.cpp | 8 +- .../Optimizations/liftUpFunctions.cpp | 6 +- .../QueryPlan/Optimizations/liftUpUnion.cpp | 2 +- .../Optimizations/mergeExpressions.cpp | 45 +++--- .../Optimizations/optimizePrewhere.cpp | 19 +-- .../optimizePrimaryKeyConditionAndLimit.cpp | 6 +- .../Optimizations/optimizeReadInOrder.cpp | 14 +- .../optimizeUseAggregateProjection.cpp | 30 ++-- .../optimizeUseNormalProjection.cpp | 22 +-- .../Optimizations/projectionsCommon.cpp | 14 +- .../Optimizations/projectionsCommon.h | 4 +- .../Optimizations/removeRedundantDistinct.cpp | 12 +- .../Optimizations/removeRedundantSorting.cpp | 4 +- .../QueryPlan/Optimizations/splitFilter.cpp | 14 +- .../useDataParallelAggregation.cpp | 6 +- src/Processors/QueryPlan/PartsSplitter.cpp | 2 +- .../QueryPlan/ReadFromMergeTree.cpp | 39 ++--- src/Processors/QueryPlan/ReadFromMergeTree.h | 8 +- .../QueryPlan/ReadFromSystemNumbersStep.cpp | 2 +- .../QueryPlan/SourceStepWithFilter.cpp | 8 +- .../QueryPlan/SourceStepWithFilter.h | 6 +- src/Processors/QueryPlan/TotalsHavingStep.cpp | 14 +- src/Processors/QueryPlan/TotalsHavingStep.h | 7 +- src/Processors/SourceWithKeyCondition.h | 8 +- .../Transforms/AddingDefaultsTransform.cpp | 2 +- .../Transforms/FillingTransform.cpp | 2 +- src/Storages/Hive/StorageHive.cpp | 24 +-- src/Storages/Hive/StorageHive.h | 10 +- src/Storages/IStorage.cpp | 2 +- src/Storages/IStorage.h | 4 +- src/Storages/KVStorageUtils.cpp | 2 +- src/Storages/KVStorageUtils.h | 2 +- src/Storages/KeyDescription.cpp | 2 +- src/Storages/MergeTree/IMergeTreeReader.cpp | 4 +- src/Storages/MergeTree/KeyCondition.cpp | 4 +- src/Storages/MergeTree/MergeTreeData.cpp | 10 +- src/Storages/MergeTree/MergeTreeData.h | 4 +- .../MergeTree/MergeTreeDataSelectExecutor.cpp | 4 +- src/Storages/MergeTree/MergeTreeIndexSet.cpp | 38 ++--- src/Storages/MergeTree/MergeTreeIndexSet.h | 6 +- .../MergeTree/MergeTreeSelectProcessor.cpp | 4 +- .../MergeTree/MergeTreeSequentialSource.cpp | 8 +- .../MergeTree/MergeTreeSequentialSource.h | 2 +- .../MergeTreeSplitPrewhereIntoReadSteps.cpp | 2 +- .../MergeTree/MergeTreeWhereOptimizer.cpp | 4 +- .../MergeTree/MergeTreeWhereOptimizer.h | 2 +- src/Storages/MergeTree/RPNBuilder.cpp | 4 +- .../StorageObjectStorageSource.cpp | 4 +- .../StorageObjectStorageSource.h | 2 +- .../ReadFinalForExternalReplicaStorage.cpp | 2 +- src/Storages/SelectQueryInfo.h | 12 +- src/Storages/StorageBuffer.cpp | 12 +- src/Storages/StorageDistributed.cpp | 4 +- src/Storages/StorageFile.cpp | 4 +- src/Storages/StorageFile.h | 2 +- src/Storages/StorageMaterializedView.cpp | 4 +- src/Storages/StorageMerge.cpp | 30 ++-- src/Storages/StorageMerge.h | 2 +- src/Storages/StorageMergeTree.cpp | 2 +- src/Storages/StorageMergeTree.h | 2 +- src/Storages/StorageReplicatedMergeTree.cpp | 2 +- src/Storages/StorageReplicatedMergeTree.h | 2 +- src/Storages/StorageTableFunction.h | 2 +- src/Storages/StorageURL.h | 4 +- src/Storages/StorageValues.cpp | 6 +- src/Storages/StorageView.cpp | 6 +- .../System/StorageSystemStackTrace.cpp | 4 +- src/Storages/TTLDescription.cpp | 8 +- src/Storages/VirtualColumnUtils.cpp | 12 +- src/Storages/WindowView/StorageWindowView.cpp | 2 +- 110 files changed, 718 insertions(+), 721 deletions(-) diff --git a/src/Core/InterpolateDescription.cpp b/src/Core/InterpolateDescription.cpp index 76bbefdcfd7..86681fdb591 100644 --- a/src/Core/InterpolateDescription.cpp +++ b/src/Core/InterpolateDescription.cpp @@ -13,10 +13,10 @@ namespace DB { - InterpolateDescription::InterpolateDescription(ActionsDAGPtr actions_, const Aliases & aliases) + InterpolateDescription::InterpolateDescription(ActionsDAG actions_, const Aliases & aliases) : actions(std::move(actions_)) { - for (const auto & name_type : actions->getRequiredColumns()) + for (const auto & name_type : actions.getRequiredColumns()) { if (const auto & p = aliases.find(name_type.name); p != aliases.end()) required_columns_map[p->second->getColumnName()] = name_type; @@ -24,7 +24,7 @@ namespace DB required_columns_map[name_type.name] = name_type; } - for (const ColumnWithTypeAndName & column : actions->getResultColumns()) + for (const ColumnWithTypeAndName & column : actions.getResultColumns()) { std::string name = column.name; if (const auto & p = aliases.find(name); p != aliases.end()) diff --git a/src/Core/InterpolateDescription.h b/src/Core/InterpolateDescription.h index 73579aebee4..eeead71d780 100644 --- a/src/Core/InterpolateDescription.h +++ b/src/Core/InterpolateDescription.h @@ -5,21 +5,20 @@ #include #include #include +#include namespace DB { -class ActionsDAG; -using ActionsDAGPtr = std::unique_ptr; using Aliases = std::unordered_map; /// Interpolate description struct InterpolateDescription { - explicit InterpolateDescription(ActionsDAGPtr actions, const Aliases & aliases); + explicit InterpolateDescription(ActionsDAG actions, const Aliases & aliases); - ActionsDAGPtr actions; + ActionsDAG actions; std::unordered_map required_columns_map; /// input column name -> {alias, type} std::unordered_set result_columns_set; /// result block columns diff --git a/src/Functions/indexHint.h b/src/Functions/indexHint.h index 8fd7b751760..3ab8a021ae1 100644 --- a/src/Functions/indexHint.h +++ b/src/Functions/indexHint.h @@ -58,11 +58,11 @@ public: return DataTypeUInt8().createColumnConst(input_rows_count, 1u); } - void setActions(ActionsDAGPtr actions_) { actions = std::move(actions_); } - const ActionsDAGPtr & getActions() const { return actions; } + void setActions(ActionsDAG actions_) { actions = std::move(actions_); } + const ActionsDAG & getActions() const { return actions; } private: - ActionsDAGPtr actions; + ActionsDAG actions; }; } diff --git a/src/Interpreters/ActionsDAG.cpp b/src/Interpreters/ActionsDAG.cpp index c2626285235..04be9d23c32 100644 --- a/src/Interpreters/ActionsDAG.cpp +++ b/src/Interpreters/ActionsDAG.cpp @@ -961,9 +961,9 @@ NameSet ActionsDAG::foldActionsByProjection( } -ActionsDAGPtr ActionsDAG::foldActionsByProjection(const std::unordered_map & new_inputs, const NodeRawConstPtrs & required_outputs) +ActionsDAG ActionsDAG::foldActionsByProjection(const std::unordered_map & new_inputs, const NodeRawConstPtrs & required_outputs) { - auto dag = std::make_unique(); + ActionsDAG dag; std::unordered_map inputs_mapping; std::unordered_map mapping; struct Frame @@ -1003,9 +1003,9 @@ ActionsDAGPtr ActionsDAG::foldActionsByProjection(const std::unordered_mapresult_name != rename->result_name; const auto & input_name = should_rename ? rename->result_name : new_input->result_name; - mapped_input = &dag->addInput(input_name, new_input->result_type); + mapped_input = &dag.addInput(input_name, new_input->result_type); if (should_rename) - mapped_input = &dag->addAlias(*mapped_input, new_input->result_name); + mapped_input = &dag.addAlias(*mapped_input, new_input->result_name); } node = mapped_input; @@ -1034,7 +1034,7 @@ ActionsDAGPtr ActionsDAG::foldActionsByProjection(const std::unordered_mapresult_name, frame.node->result_name); - auto & node = dag->nodes.emplace_back(*frame.node); + auto & node = dag.nodes.emplace_back(*frame.node); for (auto & child : node.children) child = mapping[child]; @@ -1049,8 +1049,8 @@ ActionsDAGPtr ActionsDAG::foldActionsByProjection(const std::unordered_mapresult_name != mapped_output->result_name) - mapped_output = &dag->addAlias(*mapped_output, output->result_name); - dag->outputs.push_back(mapped_output); + mapped_output = &dag.addAlias(*mapped_output, output->result_name); + dag.outputs.push_back(mapped_output); } return dag; @@ -1411,7 +1411,7 @@ const ActionsDAG::Node & ActionsDAG::materializeNode(const Node & node) return addAlias(*func, name); } -ActionsDAGPtr ActionsDAG::makeConvertingActions( +ActionsDAG ActionsDAG::makeConvertingActions( const ColumnsWithTypeAndName & source, const ColumnsWithTypeAndName & result, MatchColumnsMode mode, @@ -1428,7 +1428,7 @@ ActionsDAGPtr ActionsDAG::makeConvertingActions( if (add_casted_columns && mode != MatchColumnsMode::Name) throw Exception(ErrorCodes::LOGICAL_ERROR, "Converting with add_casted_columns supported only for MatchColumnsMode::Name"); - auto actions_dag = std::make_unique(source); + ActionsDAG actions_dag(source); NodeRawConstPtrs projection(num_result_columns); FunctionOverloadResolverPtr func_builder_materialize = std::make_unique(std::make_shared()); @@ -1436,9 +1436,9 @@ ActionsDAGPtr ActionsDAG::makeConvertingActions( std::unordered_map> inputs; if (mode == MatchColumnsMode::Name) { - size_t input_nodes_size = actions_dag->inputs.size(); + size_t input_nodes_size = actions_dag.inputs.size(); for (size_t pos = 0; pos < input_nodes_size; ++pos) - inputs[actions_dag->inputs[pos]->result_name].push_back(pos); + inputs[actions_dag.inputs[pos]->result_name].push_back(pos); } for (size_t result_col_num = 0; result_col_num < num_result_columns; ++result_col_num) @@ -1451,7 +1451,7 @@ ActionsDAGPtr ActionsDAG::makeConvertingActions( { case MatchColumnsMode::Position: { - src_node = dst_node = actions_dag->inputs[result_col_num]; + src_node = dst_node = actions_dag.inputs[result_col_num]; break; } @@ -1462,7 +1462,7 @@ ActionsDAGPtr ActionsDAG::makeConvertingActions( { const auto * res_const = typeid_cast(res_elem.column.get()); if (ignore_constant_values && res_const) - src_node = dst_node = &actions_dag->addColumn(res_elem); + src_node = dst_node = &actions_dag.addColumn(res_elem); else throw Exception(ErrorCodes::THERE_IS_NO_COLUMN, "Cannot find column `{}` in source stream, there are only columns: [{}]", @@ -1470,7 +1470,7 @@ ActionsDAGPtr ActionsDAG::makeConvertingActions( } else { - src_node = dst_node = actions_dag->inputs[input.front()]; + src_node = dst_node = actions_dag.inputs[input.front()]; input.pop_front(); } break; @@ -1483,7 +1483,7 @@ ActionsDAGPtr ActionsDAG::makeConvertingActions( if (const auto * src_const = typeid_cast(dst_node->column.get())) { if (ignore_constant_values) - dst_node = &actions_dag->addColumn(res_elem); + dst_node = &actions_dag.addColumn(res_elem); else if (res_const->getField() != src_const->getField()) throw Exception( ErrorCodes::ILLEGAL_COLUMN, @@ -1505,7 +1505,7 @@ ActionsDAGPtr ActionsDAG::makeConvertingActions( column.column = DataTypeString().createColumnConst(0, column.name); column.type = std::make_shared(); - const auto * right_arg = &actions_dag->addColumn(std::move(column)); + const auto * right_arg = &actions_dag.addColumn(std::move(column)); const auto * left_arg = dst_node; CastDiagnostic diagnostic = {dst_node->result_name, res_elem.name}; @@ -1513,13 +1513,13 @@ ActionsDAGPtr ActionsDAG::makeConvertingActions( = createInternalCastOverloadResolver(CastType::nonAccurate, std::move(diagnostic)); NodeRawConstPtrs children = { left_arg, right_arg }; - dst_node = &actions_dag->addFunction(func_builder_cast, std::move(children), {}); + dst_node = &actions_dag.addFunction(func_builder_cast, std::move(children), {}); } if (dst_node->column && isColumnConst(*dst_node->column) && !(res_elem.column && isColumnConst(*res_elem.column))) { NodeRawConstPtrs children = {dst_node}; - dst_node = &actions_dag->addFunction(func_builder_materialize, std::move(children), {}); + dst_node = &actions_dag.addFunction(func_builder_materialize, std::move(children), {}); } if (dst_node->result_name != res_elem.name) @@ -1538,7 +1538,7 @@ ActionsDAGPtr ActionsDAG::makeConvertingActions( } else { - dst_node = &actions_dag->addAlias(*dst_node, res_elem.name); + dst_node = &actions_dag.addAlias(*dst_node, res_elem.name); projection[result_col_num] = dst_node; } } @@ -1548,36 +1548,36 @@ ActionsDAGPtr ActionsDAG::makeConvertingActions( } } - actions_dag->outputs.swap(projection); - actions_dag->removeUnusedActions(false); + actions_dag.outputs.swap(projection); + actions_dag.removeUnusedActions(false); return actions_dag; } -ActionsDAGPtr ActionsDAG::makeAddingColumnActions(ColumnWithTypeAndName column) +ActionsDAG ActionsDAG::makeAddingColumnActions(ColumnWithTypeAndName column) { - auto adding_column_action = std::make_unique(); + ActionsDAG adding_column_action; FunctionOverloadResolverPtr func_builder_materialize = std::make_unique(std::make_shared()); auto column_name = column.name; - const auto * column_node = &adding_column_action->addColumn(std::move(column)); + const auto * column_node = &adding_column_action.addColumn(std::move(column)); NodeRawConstPtrs inputs = {column_node}; - const auto & function_node = adding_column_action->addFunction(func_builder_materialize, std::move(inputs), {}); - const auto & alias_node = adding_column_action->addAlias(function_node, std::move(column_name)); + const auto & function_node = adding_column_action.addFunction(func_builder_materialize, std::move(inputs), {}); + const auto & alias_node = adding_column_action.addAlias(function_node, std::move(column_name)); - adding_column_action->outputs.push_back(&alias_node); + adding_column_action.outputs.push_back(&alias_node); return adding_column_action; } -ActionsDAGPtr ActionsDAG::merge(ActionsDAG && first, ActionsDAG && second) +ActionsDAG ActionsDAG::merge(ActionsDAG && first, ActionsDAG && second) { first.mergeInplace(std::move(second)); /// Some actions could become unused. Do not drop inputs to preserve the header. first.removeUnusedActions(false); - return std::make_unique(std::move(first)); + return std::move(first); } void ActionsDAG::mergeInplace(ActionsDAG && second) @@ -1970,15 +1970,15 @@ ActionsDAG::SplitResult ActionsDAG::split(std::unordered_set split second_inputs.push_back(cur.to_second); } - auto first_actions = std::make_unique(); - first_actions->nodes.swap(first_nodes); - first_actions->outputs.swap(first_outputs); - first_actions->inputs.swap(first_inputs); + ActionsDAG first_actions; + first_actions.nodes.swap(first_nodes); + first_actions.outputs.swap(first_outputs); + first_actions.inputs.swap(first_inputs); - auto second_actions = std::make_unique(); - second_actions->nodes.swap(second_nodes); - second_actions->outputs.swap(second_outputs); - second_actions->inputs.swap(second_inputs); + ActionsDAG second_actions; + second_actions.nodes.swap(second_nodes); + second_actions.outputs.swap(second_outputs); + second_actions.inputs.swap(second_inputs); std::unordered_map split_nodes_mapping; if (create_split_nodes_mapping) @@ -2098,7 +2098,7 @@ ActionsDAG::SplitResult ActionsDAG::splitActionsBySortingDescription(const NameS return res; } -bool ActionsDAG::isFilterAlwaysFalseForDefaultValueInputs(const std::string & filter_name, const Block & input_stream_header) +bool ActionsDAG::isFilterAlwaysFalseForDefaultValueInputs(const std::string & filter_name, const Block & input_stream_header) const { const auto * filter_node = tryFindInOutputs(filter_name); if (!filter_node) @@ -2122,7 +2122,7 @@ bool ActionsDAG::isFilterAlwaysFalseForDefaultValueInputs(const std::string & fi input_node_name_to_default_input_column.emplace(input->result_name, std::move(constant_column_with_type_and_name)); } - ActionsDAGPtr filter_with_default_value_inputs; + std::optional filter_with_default_value_inputs; try { @@ -2304,12 +2304,12 @@ ColumnsWithTypeAndName prepareFunctionArguments(const ActionsDAG::NodeRawConstPt /// /// Result actions add single column with conjunction result (it is always first in outputs). /// No other columns are added or removed. -ActionsDAGPtr ActionsDAG::createActionsForConjunction(NodeRawConstPtrs conjunction, const ColumnsWithTypeAndName & all_inputs) +std::optional ActionsDAG::createActionsForConjunction(NodeRawConstPtrs conjunction, const ColumnsWithTypeAndName & all_inputs) { if (conjunction.empty()) - return nullptr; + return {}; - auto actions = std::make_unique(); + ActionsDAG actions; FunctionOverloadResolverPtr func_builder_and = std::make_unique(std::make_shared()); @@ -2350,7 +2350,7 @@ ActionsDAGPtr ActionsDAG::createActionsForConjunction(NodeRawConstPtrs conjuncti if (cur.next_child_to_visit == cur.node->children.size()) { - auto & node = actions->nodes.emplace_back(*cur.node); + auto & node = actions.nodes.emplace_back(*cur.node); nodes_mapping[cur.node] = &node; for (auto & child : node.children) @@ -2373,33 +2373,33 @@ ActionsDAGPtr ActionsDAG::createActionsForConjunction(NodeRawConstPtrs conjuncti for (const auto * predicate : conjunction) args.emplace_back(nodes_mapping[predicate]); - result_predicate = &actions->addFunction(func_builder_and, std::move(args), {}); + result_predicate = &actions.addFunction(func_builder_and, std::move(args), {}); } - actions->outputs.push_back(result_predicate); + actions.outputs.push_back(result_predicate); for (const auto & col : all_inputs) { const Node * input; auto & list = required_inputs[col.name]; if (list.empty()) - input = &actions->addInput(col); + input = &actions.addInput(col); else { input = list.front(); list.pop_front(); - actions->inputs.push_back(input); + actions.inputs.push_back(input); } /// We should not add result_predicate into the outputs for the second time. if (input->result_name != result_predicate->result_name) - actions->outputs.push_back(input); + actions.outputs.push_back(input); } return actions; } -ActionsDAGPtr ActionsDAG::splitActionsForFilterPushDown( +std::optional ActionsDAG::splitActionsForFilterPushDown( const std::string & filter_name, bool removes_filter, const Names & available_inputs, @@ -2415,7 +2415,7 @@ ActionsDAGPtr ActionsDAG::splitActionsForFilterPushDown( /// If condition is constant let's do nothing. /// It means there is nothing to push down or optimization was already applied. if (predicate->type == ActionType::COLUMN) - return nullptr; + return {}; std::unordered_set allowed_nodes; @@ -2439,7 +2439,7 @@ ActionsDAGPtr ActionsDAG::splitActionsForFilterPushDown( auto conjunction = getConjunctionNodes(predicate, allowed_nodes); if (conjunction.allowed.empty()) - return nullptr; + return {}; chassert(predicate->result_type); @@ -2451,13 +2451,13 @@ ActionsDAGPtr ActionsDAG::splitActionsForFilterPushDown( && !conjunction.rejected.front()->result_type->equals(*predicate->result_type)) { /// No further optimization can be done - return nullptr; + return {}; } } auto actions = createActionsForConjunction(conjunction.allowed, all_inputs); if (!actions) - return nullptr; + return {}; /// Now, when actions are created, update the current DAG. removeUnusedConjunctions(std::move(conjunction.rejected), predicate, removes_filter); @@ -2562,11 +2562,11 @@ ActionsDAG::ActionsForJOINFilterPushDown ActionsDAG::splitActionsForJOINFilterPu auto left_stream_filter_to_push_down = createActionsForConjunction(left_stream_allowed_conjunctions, left_stream_header.getColumnsWithTypeAndName()); auto right_stream_filter_to_push_down = createActionsForConjunction(right_stream_allowed_conjunctions, right_stream_header.getColumnsWithTypeAndName()); - auto replace_equivalent_columns_in_filter = [](const ActionsDAGPtr & filter, + auto replace_equivalent_columns_in_filter = [](const ActionsDAG & filter, const Block & stream_header, const std::unordered_map & columns_to_replace) { - auto updated_filter = ActionsDAG::buildFilterActionsDAG({filter->getOutputs()[0]}, columns_to_replace); + auto updated_filter = ActionsDAG::buildFilterActionsDAG({filter.getOutputs()[0]}, columns_to_replace); chassert(updated_filter->getOutputs().size() == 1); /** If result filter to left or right stream has column that is one of the stream inputs, we need distinguish filter column from @@ -2587,7 +2587,7 @@ ActionsDAG::ActionsForJOINFilterPushDown ActionsDAG::splitActionsForJOINFilterPu for (const auto & input : updated_filter->getInputs()) updated_filter_inputs[input->result_name].push_back(input); - for (const auto & input : filter->getInputs()) + for (const auto & input : filter.getInputs()) { if (updated_filter_inputs.contains(input->result_name)) continue; @@ -2625,12 +2625,12 @@ ActionsDAG::ActionsForJOINFilterPushDown ActionsDAG::splitActionsForJOINFilterPu }; if (left_stream_filter_to_push_down) - left_stream_filter_to_push_down = replace_equivalent_columns_in_filter(left_stream_filter_to_push_down, + left_stream_filter_to_push_down = replace_equivalent_columns_in_filter(*left_stream_filter_to_push_down, left_stream_header, equivalent_right_stream_column_to_left_stream_column); if (right_stream_filter_to_push_down) - right_stream_filter_to_push_down = replace_equivalent_columns_in_filter(right_stream_filter_to_push_down, + right_stream_filter_to_push_down = replace_equivalent_columns_in_filter(*right_stream_filter_to_push_down, right_stream_header, equivalent_left_stream_column_to_right_stream_column); @@ -2859,13 +2859,13 @@ bool ActionsDAG::isSortingPreserved( return true; } -ActionsDAGPtr ActionsDAG::buildFilterActionsDAG( +std::optional ActionsDAG::buildFilterActionsDAG( const NodeRawConstPtrs & filter_nodes, const std::unordered_map & node_name_to_input_node_column, bool single_output_condition_node) { if (filter_nodes.empty()) - return nullptr; + return {}; struct Frame { @@ -2873,7 +2873,7 @@ ActionsDAGPtr ActionsDAG::buildFilterActionsDAG( bool visited_children = false; }; - auto result_dag = std::make_unique(); + ActionsDAG result_dag; std::unordered_map result_inputs; std::unordered_map node_to_result_node; @@ -2904,7 +2904,7 @@ ActionsDAGPtr ActionsDAG::buildFilterActionsDAG( { auto & result_input = result_inputs[input_node_it->second.name]; if (!result_input) - result_input = &result_dag->addInput(input_node_it->second); + result_input = &result_dag.addInput(input_node_it->second); node_to_result_node.emplace(node, result_input); nodes_to_process.pop_back(); @@ -2931,25 +2931,25 @@ ActionsDAGPtr ActionsDAG::buildFilterActionsDAG( { auto & result_input = result_inputs[node->result_name]; if (!result_input) - result_input = &result_dag->addInput({node->column, node->result_type, node->result_name}); + result_input = &result_dag.addInput({node->column, node->result_type, node->result_name}); result_node = result_input; break; } case ActionsDAG::ActionType::COLUMN: { - result_node = &result_dag->addColumn({node->column, node->result_type, node->result_name}); + result_node = &result_dag.addColumn({node->column, node->result_type, node->result_name}); break; } case ActionsDAG::ActionType::ALIAS: { const auto * child = node->children.front(); - result_node = &result_dag->addAlias(*(node_to_result_node.find(child)->second), node->result_name); + result_node = &result_dag.addAlias(*(node_to_result_node.find(child)->second), node->result_name); break; } case ActionsDAG::ActionType::ARRAY_JOIN: { const auto * child = node->children.front(); - result_node = &result_dag->addArrayJoin(*(node_to_result_node.find(child)->second), {}); + result_node = &result_dag.addArrayJoin(*(node_to_result_node.find(child)->second), {}); break; } case ActionsDAG::ActionType::FUNCTION: @@ -2967,13 +2967,11 @@ ActionsDAGPtr ActionsDAG::buildFilterActionsDAG( { if (const auto * index_hint = typeid_cast(adaptor->getFunction().get())) { - ActionsDAGPtr index_hint_filter_dag; - const auto & index_hint_args = index_hint->getActions()->getOutputs(); + ActionsDAG index_hint_filter_dag; + const auto & index_hint_args = index_hint->getActions().getOutputs(); - if (index_hint_args.empty()) - index_hint_filter_dag = std::make_unique(); - else - index_hint_filter_dag = buildFilterActionsDAG(index_hint_args, + if (!index_hint_args.empty()) + index_hint_filter_dag = *buildFilterActionsDAG(index_hint_args, node_name_to_input_node_column, false /*single_output_condition_node*/); @@ -2995,7 +2993,7 @@ ActionsDAGPtr ActionsDAG::buildFilterActionsDAG( auto [arguments, all_const] = getFunctionArguments(function_children); auto function_base = function_overload_resolver ? function_overload_resolver->build(arguments) : node->function_base; - result_node = &result_dag->addFunctionImpl( + result_node = &result_dag.addFunctionImpl( function_base, std::move(function_children), std::move(arguments), @@ -3010,7 +3008,7 @@ ActionsDAGPtr ActionsDAG::buildFilterActionsDAG( nodes_to_process.pop_back(); } - auto & result_dag_outputs = result_dag->getOutputs(); + auto & result_dag_outputs = result_dag.getOutputs(); result_dag_outputs.reserve(filter_nodes_size); for (const auto & node : filter_nodes) @@ -3019,7 +3017,7 @@ ActionsDAGPtr ActionsDAG::buildFilterActionsDAG( if (result_dag_outputs.size() > 1 && single_output_condition_node) { FunctionOverloadResolverPtr func_builder_and = std::make_unique(std::make_shared()); - result_dag_outputs = { &result_dag->addFunction(func_builder_and, result_dag_outputs, {}) }; + result_dag_outputs = { &result_dag.addFunction(func_builder_and, result_dag_outputs, {}) }; } return result_dag; diff --git a/src/Interpreters/ActionsDAG.h b/src/Interpreters/ActionsDAG.h index 7ca3d1c1b0d..cf6a91b9fe7 100644 --- a/src/Interpreters/ActionsDAG.h +++ b/src/Interpreters/ActionsDAG.h @@ -247,7 +247,7 @@ public: /// c * d e /// \ / /// c * d - e - static ActionsDAGPtr foldActionsByProjection( + static ActionsDAG foldActionsByProjection( const std::unordered_map & new_inputs, const NodeRawConstPtrs & required_outputs); @@ -303,7 +303,7 @@ public: /// @param ignore_constant_values - Do not check that constants are same. Use value from result_header. /// @param add_casted_columns - Create new columns with converted values instead of replacing original. /// @param new_names - Output parameter for new column names when add_casted_columns is used. - static ActionsDAGPtr makeConvertingActions( + static ActionsDAG makeConvertingActions( const ColumnsWithTypeAndName & source, const ColumnsWithTypeAndName & result, MatchColumnsMode mode, @@ -312,13 +312,13 @@ public: NameToNameMap * new_names = nullptr); /// Create expression which add const column and then materialize it. - static ActionsDAGPtr makeAddingColumnActions(ColumnWithTypeAndName column); + static ActionsDAG makeAddingColumnActions(ColumnWithTypeAndName column); /// Create ActionsDAG which represents expression equivalent to applying first and second actions consequently. /// Is used to replace `(first -> second)` expression chain to single `merge(first, second)` expression. /// If first.settings.project_input is set, then outputs of `first` must include inputs of `second`. /// Otherwise, any two actions may be combined. - static ActionsDAGPtr merge(ActionsDAG && first, ActionsDAG && second); + static ActionsDAG merge(ActionsDAG && first, ActionsDAG && second); /// The result is similar to merge(*this, second); /// Invariant : no nodes are removed from the first (this) DAG. @@ -329,12 +329,7 @@ public: /// *out_outputs is filled with pointers to the nodes corresponding to second.getOutputs(). void mergeNodes(ActionsDAG && second, NodeRawConstPtrs * out_outputs = nullptr); - struct SplitResult - { - ActionsDAGPtr first; - ActionsDAGPtr second; - std::unordered_map split_nodes_mapping; - }; + struct SplitResult; /// Split ActionsDAG into two DAGs, where first part contains all nodes from split_nodes and their children. /// Execution of first then second parts on block is equivalent to execution of initial DAG. @@ -362,7 +357,7 @@ public: * @param filter_name - name of filter node in current DAG. * @param input_stream_header - input stream header. */ - bool isFilterAlwaysFalseForDefaultValueInputs(const std::string & filter_name, const Block & input_stream_header); + bool isFilterAlwaysFalseForDefaultValueInputs(const std::string & filter_name, const Block & input_stream_header) const; /// Create actions which may calculate part of filter using only available_inputs. /// If nothing may be calculated, returns nullptr. @@ -381,19 +376,13 @@ public: /// columns will be transformed like `x, y, z` -> `z > 0, z, x, y` -(remove filter)-> `z, x, y`. /// To avoid it, add inputs from `all_inputs` list, /// so actions `x, y, z -> z > 0, x, y, z` -(remove filter)-> `x, y, z` will not change columns order. - ActionsDAGPtr splitActionsForFilterPushDown( + std::optional splitActionsForFilterPushDown( const std::string & filter_name, bool removes_filter, const Names & available_inputs, const ColumnsWithTypeAndName & all_inputs); - struct ActionsForJOINFilterPushDown - { - ActionsDAGPtr left_stream_filter_to_push_down; - bool left_stream_filter_removes_filter; - ActionsDAGPtr right_stream_filter_to_push_down; - bool right_stream_filter_removes_filter; - }; + struct ActionsForJOINFilterPushDown; /** Split actions for JOIN filter push down. * @@ -440,7 +429,7 @@ public: * * If single_output_condition_node = false, result dag has multiple output nodes. */ - static ActionsDAGPtr buildFilterActionsDAG( + static std::optional buildFilterActionsDAG( const NodeRawConstPtrs & filter_nodes, const std::unordered_map & node_name_to_input_node_column = {}, bool single_output_condition_node = true); @@ -472,11 +461,26 @@ private: void compileFunctions(size_t min_count_to_compile_expression, const std::unordered_set & lazy_executed_nodes = {}); #endif - static ActionsDAGPtr createActionsForConjunction(NodeRawConstPtrs conjunction, const ColumnsWithTypeAndName & all_inputs); + static std::optional createActionsForConjunction(NodeRawConstPtrs conjunction, const ColumnsWithTypeAndName & all_inputs); void removeUnusedConjunctions(NodeRawConstPtrs rejected_conjunctions, Node * predicate, bool removes_filter); }; +struct ActionsDAG::SplitResult +{ + ActionsDAG first; + ActionsDAG second; + std::unordered_map split_nodes_mapping; +}; + +struct ActionsDAG::ActionsForJOINFilterPushDown +{ + std::optional left_stream_filter_to_push_down; + bool left_stream_filter_removes_filter; + std::optional right_stream_filter_to_push_down; + bool right_stream_filter_removes_filter; +}; + class FindOriginalNodeForOutputName { using NameToNodeIndex = std::unordered_map; diff --git a/src/Interpreters/ActionsVisitor.cpp b/src/Interpreters/ActionsVisitor.cpp index 036b5ba9be0..c2dcdcd34e7 100644 --- a/src/Interpreters/ActionsVisitor.cpp +++ b/src/Interpreters/ActionsVisitor.cpp @@ -1022,7 +1022,7 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data & dag.project(args); auto index_hint = std::make_shared(); - index_hint->setActions(std::make_unique(std::move(dag))); + index_hint->setActions(std::move(dag)); // Arguments are removed. We add function instead of constant column to avoid constant folding. data.addFunction(std::make_unique(index_hint), {}, column_name); @@ -1285,7 +1285,7 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data & lambda_dag.removeUnusedActions(Names(1, result_name)); auto lambda_actions = std::make_shared( - std::make_unique(std::move(lambda_dag)), + std::move(lambda_dag), ExpressionActionsSettings::fromContext(data.getContext(), CompileExpressions::yes)); DataTypePtr result_type = lambda_actions->getSampleBlock().getByName(result_name).type; diff --git a/src/Interpreters/ExpressionActions.cpp b/src/Interpreters/ExpressionActions.cpp index 2eca31fc75e..399f4f2ff4f 100644 --- a/src/Interpreters/ExpressionActions.cpp +++ b/src/Interpreters/ExpressionActions.cpp @@ -49,14 +49,13 @@ namespace ErrorCodes static std::unordered_set processShortCircuitFunctions(const ActionsDAG & actions_dag, ShortCircuitFunctionEvaluation short_circuit_function_evaluation); -ExpressionActions::ExpressionActions(ActionsDAGPtr actions_dag_, const ExpressionActionsSettings & settings_, bool project_inputs_) - : project_inputs(project_inputs_) +ExpressionActions::ExpressionActions(ActionsDAG actions_dag_, const ExpressionActionsSettings & settings_, bool project_inputs_) + : actions_dag(std::move(actions_dag_)) + , project_inputs(project_inputs_) , settings(settings_) { - actions_dag = ActionsDAG::clone(actions_dag_); - /// It's important to determine lazy executed nodes before compiling expressions. - std::unordered_set lazy_executed_nodes = processShortCircuitFunctions(*actions_dag, settings.short_circuit_function_evaluation); + std::unordered_set lazy_executed_nodes = processShortCircuitFunctions(actions_dag, settings.short_circuit_function_evaluation); #if USE_EMBEDDED_COMPILER if (settings.can_compile_expressions && settings.compile_expressions == CompileExpressions::yes) @@ -68,7 +67,7 @@ ExpressionActions::ExpressionActions(ActionsDAGPtr actions_dag_, const Expressio if (settings.max_temporary_columns && num_columns > settings.max_temporary_columns) throw Exception(ErrorCodes::TOO_MANY_TEMPORARY_COLUMNS, "Too many temporary columns: {}. Maximum: {}", - actions_dag->dumpNames(), settings.max_temporary_columns); + actions_dag.dumpNames(), settings.max_temporary_columns); } ExpressionActionsPtr ExpressionActions::clone() const @@ -76,12 +75,12 @@ ExpressionActionsPtr ExpressionActions::clone() const auto copy = std::make_shared(ExpressionActions()); std::unordered_map copy_map; - copy->actions_dag = ActionsDAG::clone(actions_dag.get(), copy_map); + copy->actions_dag = std::move(*ActionsDAG::clone(&actions_dag, copy_map)); copy->actions = actions; for (auto & action : copy->actions) action.node = copy_map[action.node]; - for (const auto * input : copy->actions_dag->getInputs()) + for (const auto * input : copy->actions_dag.getInputs()) copy->input_positions.emplace(input->result_name, input_positions.at(input->result_name)); copy->num_columns = num_columns; @@ -357,8 +356,8 @@ void ExpressionActions::linearizeActions(const std::unordered_setgetOutputs(); - const auto & inputs = actions_dag->getInputs(); + const auto & outputs = actions_dag.getOutputs(); + const auto & inputs = actions_dag.getInputs(); auto reverse_info = getActionsDAGReverseInfo(nodes, outputs); std::vector data; diff --git a/src/Interpreters/ExpressionActions.h b/src/Interpreters/ExpressionActions.h index 63ea989bd5e..6ff39ee07f7 100644 --- a/src/Interpreters/ExpressionActions.h +++ b/src/Interpreters/ExpressionActions.h @@ -70,7 +70,7 @@ public: using NameToInputMap = std::unordered_map>; private: - ActionsDAGPtr actions_dag; + ActionsDAG actions_dag; Actions actions; size_t num_columns = 0; @@ -84,13 +84,13 @@ private: ExpressionActionsSettings settings; public: - explicit ExpressionActions(ActionsDAGPtr actions_dag_, const ExpressionActionsSettings & settings_ = {}, bool project_inputs_ = false); + explicit ExpressionActions(ActionsDAG actions_dag_, const ExpressionActionsSettings & settings_ = {}, bool project_inputs_ = false); ExpressionActions(ExpressionActions &&) = default; ExpressionActions & operator=(ExpressionActions &&) = default; const Actions & getActions() const { return actions; } - const std::list & getNodes() const { return actions_dag->getNodes(); } - const ActionsDAG & getActionsDAG() const { return *actions_dag; } + const std::list & getNodes() const { return actions_dag.getNodes(); } + const ActionsDAG & getActionsDAG() const { return actions_dag; } const ColumnNumbers & getResultPositions() const { return result_positions; } const ExpressionActionsSettings & getSettings() const { return settings; } diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index 6b49365b492..068b6f290fa 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -928,7 +928,7 @@ JoinPtr SelectQueryExpressionAnalyzer::appendJoin( { const ColumnsWithTypeAndName & left_sample_columns = chain.getLastStep().getResultColumns(); - ActionsDAGPtr converting_actions; + std::optional converting_actions; JoinPtr join = makeJoin(*syntax->ast_join, left_sample_columns, converting_actions); if (converting_actions) @@ -1039,7 +1039,7 @@ static std::unique_ptr buildJoinedPlan( /// Actions which need to be calculated on joined block. auto joined_block_actions = analyzed_join.createJoinedBlockActions(context); NamesWithAliases required_columns_with_aliases = analyzed_join.getRequiredColumns( - Block(joined_block_actions->getResultColumns()), joined_block_actions->getRequiredColumns().getNames()); + Block(joined_block_actions.getResultColumns()), joined_block_actions.getRequiredColumns().getNames()); Names original_right_column_names; for (auto & pr : required_columns_with_aliases) @@ -1060,17 +1060,17 @@ static std::unique_ptr buildJoinedPlan( interpreter->buildQueryPlan(*joined_plan); { Block original_right_columns = interpreter->getSampleBlock(); - auto rename_dag = std::make_unique(original_right_columns.getColumnsWithTypeAndName()); + ActionsDAG rename_dag(original_right_columns.getColumnsWithTypeAndName()); for (const auto & name_with_alias : required_columns_with_aliases) { if (name_with_alias.first != name_with_alias.second && original_right_columns.has(name_with_alias.first)) { auto pos = original_right_columns.getPositionByName(name_with_alias.first); - const auto & alias = rename_dag->addAlias(*rename_dag->getInputs()[pos], name_with_alias.second); - rename_dag->getOutputs()[pos] = &alias; + const auto & alias = rename_dag.addAlias(*rename_dag.getInputs()[pos], name_with_alias.second); + rename_dag.getOutputs()[pos] = &alias; } } - rename_dag->appendInputsForUnusedColumns(joined_plan->getCurrentDataStream().header); + rename_dag.appendInputsForUnusedColumns(joined_plan->getCurrentDataStream().header); auto rename_step = std::make_unique(joined_plan->getCurrentDataStream(), std::move(rename_dag)); rename_step->setStepDescription("Rename joined columns"); joined_plan->addStep(std::move(rename_step)); @@ -1130,14 +1130,14 @@ std::shared_ptr tryKeyValueJoin(std::shared_ptr a JoinPtr SelectQueryExpressionAnalyzer::makeJoin( const ASTTablesInSelectQueryElement & join_element, const ColumnsWithTypeAndName & left_columns, - ActionsDAGPtr & left_convert_actions) + std::optional & left_convert_actions) { /// Two JOINs are not supported with the same subquery, but different USINGs. if (joined_plan) throw Exception(ErrorCodes::LOGICAL_ERROR, "Table join was already created for query"); - ActionsDAGPtr right_convert_actions = nullptr; + std::optional right_convert_actions; const auto & analyzed_join = syntax->analyzed_join; @@ -1145,7 +1145,7 @@ JoinPtr SelectQueryExpressionAnalyzer::makeJoin( { auto joined_block_actions = analyzed_join->createJoinedBlockActions(getContext()); NamesWithAliases required_columns_with_aliases = analyzed_join->getRequiredColumns( - Block(joined_block_actions->getResultColumns()), joined_block_actions->getRequiredColumns().getNames()); + Block(joined_block_actions.getResultColumns()), joined_block_actions.getRequiredColumns().getNames()); Names original_right_column_names; for (auto & pr : required_columns_with_aliases) @@ -1162,7 +1162,7 @@ JoinPtr SelectQueryExpressionAnalyzer::makeJoin( std::tie(left_convert_actions, right_convert_actions) = analyzed_join->createConvertingActions(left_columns, right_columns); if (right_convert_actions) { - auto converting_step = std::make_unique(joined_plan->getCurrentDataStream(), right_convert_actions); + auto converting_step = std::make_unique(joined_plan->getCurrentDataStream(), std::move(*right_convert_actions)); converting_step->setStepDescription("Convert joined columns"); joined_plan->addStep(std::move(converting_step)); } @@ -1354,8 +1354,8 @@ bool SelectQueryExpressionAnalyzer::appendGroupBy(ExpressionActionsChain & chain { for (auto & child : asts) { - auto actions_dag = std::make_unique(columns_after_join); - getRootActions(child, only_types, *actions_dag); + ActionsDAG actions_dag(columns_after_join); + getRootActions(child, only_types, actions_dag); group_by_elements_actions.emplace_back( std::make_shared(std::move(actions_dag), ExpressionActionsSettings::fromContext(getContext(), CompileExpressions::yes))); } @@ -1471,7 +1471,7 @@ void SelectQueryExpressionAnalyzer::appendGroupByModifiers(ActionsDAG & before_a ExpressionActionsChain::Step & step = chain.addStep(before_aggregation.getNamesAndTypesList()); step.required_output = std::move(required_output); - step.actions()->dag = std::move(*ActionsDAG::makeConvertingActions(source_columns, result_columns, ActionsDAG::MatchColumnsMode::Position)); + step.actions()->dag = ActionsDAG::makeConvertingActions(source_columns, result_columns, ActionsDAG::MatchColumnsMode::Position); } void SelectQueryExpressionAnalyzer::appendSelectSkipWindowExpressions(ExpressionActionsChain::Step & step, ASTPtr const & node) @@ -1607,8 +1607,8 @@ ActionsAndProjectInputsFlagPtr SelectQueryExpressionAnalyzer::appendOrderBy(Expr { for (const auto & child : select_query->orderBy()->children) { - auto actions_dag = std::make_unique(columns_after_join); - getRootActions(child, only_types, *actions_dag); + ActionsDAG actions_dag(columns_after_join); + getRootActions(child, only_types, actions_dag); order_by_elements_actions.emplace_back( std::make_shared(std::move(actions_dag), ExpressionActionsSettings::fromContext(getContext(), CompileExpressions::yes))); } @@ -1737,7 +1737,7 @@ void ExpressionAnalyzer::appendExpression(ExpressionActionsChain & chain, const step.addRequiredOutput(expr->getColumnName()); } -ActionsDAGPtr ExpressionAnalyzer::getActionsDAG(bool add_aliases, bool remove_unused_result) +ActionsDAG ExpressionAnalyzer::getActionsDAG(bool add_aliases, bool remove_unused_result) { ActionsDAG actions_dag(aggregated_columns); NamesWithAliases result_columns; @@ -1789,7 +1789,7 @@ ActionsDAGPtr ExpressionAnalyzer::getActionsDAG(bool add_aliases, bool remove_un actions_dag.removeUnusedActions(name_set); } - return std::make_unique(std::move(actions_dag)); + return actions_dag; } ExpressionActionsPtr ExpressionAnalyzer::getActions(bool add_aliases, bool remove_unused_result, CompileExpressions compile_expressions) @@ -1798,10 +1798,10 @@ ExpressionActionsPtr ExpressionAnalyzer::getActions(bool add_aliases, bool remov getActionsDAG(add_aliases, remove_unused_result), ExpressionActionsSettings::fromContext(getContext(), compile_expressions), add_aliases && remove_unused_result); } -ActionsDAGPtr ExpressionAnalyzer::getConstActionsDAG(const ColumnsWithTypeAndName & constant_inputs) +ActionsDAG ExpressionAnalyzer::getConstActionsDAG(const ColumnsWithTypeAndName & constant_inputs) { - auto actions = std::make_unique(constant_inputs); - getRootActions(query, true /* no_makeset_for_subqueries */, *actions, true /* only_consts */); + ActionsDAG actions(constant_inputs); + getRootActions(query, true /* no_makeset_for_subqueries */, actions, true /* only_consts */); return actions; } @@ -1879,8 +1879,7 @@ ExpressionAnalysisResult::ExpressionAnalysisResult( if (prewhere_dag_and_flags) { - auto dag = std::make_unique(std::move(prewhere_dag_and_flags->dag)); - prewhere_info = std::make_shared(std::move(dag), query.prewhere()->getColumnName()); + prewhere_info = std::make_shared(std::move(prewhere_dag_and_flags->dag), query.prewhere()->getColumnName()); prewhere_dag_and_flags.reset(); } @@ -1944,7 +1943,7 @@ ExpressionAnalysisResult::ExpressionAnalysisResult( Block before_prewhere_sample = source_header; if (sanitizeBlock(before_prewhere_sample)) { - auto dag = ActionsDAG::clone(&prewhere_dag_and_flags->dag); + ActionsDAG dag = std::move(*ActionsDAG::clone(&prewhere_dag_and_flags->dag)); ExpressionActions( std::move(dag), ExpressionActionsSettings::fromSettings(context->getSettingsRef())).execute(before_prewhere_sample); @@ -1980,7 +1979,7 @@ ExpressionAnalysisResult::ExpressionAnalysisResult( if (sanitizeBlock(before_where_sample)) { ExpressionActions( - ActionsDAG::clone(&before_where->dag), + std::move(*ActionsDAG::clone(&before_where->dag)), ExpressionActionsSettings::fromSettings(context->getSettingsRef())).execute(before_where_sample); auto & column_elem @@ -2054,7 +2053,7 @@ ExpressionAnalysisResult::ExpressionAnalysisResult( auto & step = chain.lastStep(query_analyzer.aggregated_columns); auto & actions = step.actions()->dag; - actions = std::move(*ActionsDAG::merge(std::move(actions), std::move(*converting))); + actions = ActionsDAG::merge(std::move(actions), std::move(converting)); } } @@ -2235,7 +2234,7 @@ void ExpressionAnalysisResult::checkActions() const /// Check that PREWHERE doesn't contain unusual actions. Unusual actions are that can change number of rows. if (hasPrewhere()) { - auto check_actions = [](const ActionsDAGPtr & actions) + auto check_actions = [](const std::optional & actions) { if (actions) for (const auto & node : actions->getNodes()) diff --git a/src/Interpreters/ExpressionAnalyzer.h b/src/Interpreters/ExpressionAnalyzer.h index e44a5891e77..737d36eb504 100644 --- a/src/Interpreters/ExpressionAnalyzer.h +++ b/src/Interpreters/ExpressionAnalyzer.h @@ -117,12 +117,12 @@ public: /// If add_aliases, only the calculated values in the desired order and add aliases. /// If also remove_unused_result, than only aliases remain in the output block. /// Otherwise, only temporary columns will be deleted from the block. - ActionsDAGPtr getActionsDAG(bool add_aliases, bool remove_unused_result = true); + ActionsDAG getActionsDAG(bool add_aliases, bool remove_unused_result = true); ExpressionActionsPtr getActions(bool add_aliases, bool remove_unused_result = true, CompileExpressions compile_expressions = CompileExpressions::no); /// Get actions to evaluate a constant expression. The function adds constants and applies functions that depend only on constants. /// Does not execute subqueries. - ActionsDAGPtr getConstActionsDAG(const ColumnsWithTypeAndName & constant_inputs = {}); + ActionsDAG getConstActionsDAG(const ColumnsWithTypeAndName & constant_inputs = {}); ExpressionActionsPtr getConstActions(const ColumnsWithTypeAndName & constant_inputs = {}); /** Sets that require a subquery to be create. @@ -367,7 +367,7 @@ private: JoinPtr makeJoin( const ASTTablesInSelectQueryElement & join_element, const ColumnsWithTypeAndName & left_columns, - ActionsDAGPtr & left_convert_actions); + std::optional & left_convert_actions); const ASTSelectQuery * getAggregatingQuery() const; diff --git a/src/Interpreters/GlobalSubqueriesVisitor.h b/src/Interpreters/GlobalSubqueriesVisitor.h index 64b6eb5dce9..fcf0d591918 100644 --- a/src/Interpreters/GlobalSubqueriesVisitor.h +++ b/src/Interpreters/GlobalSubqueriesVisitor.h @@ -295,7 +295,7 @@ private: { auto joined_block_actions = data.table_join->createJoinedBlockActions(data.getContext()); NamesWithAliases required_columns_with_aliases = data.table_join->getRequiredColumns( - Block(joined_block_actions->getResultColumns()), joined_block_actions->getRequiredColumns().getNames()); + Block(joined_block_actions.getResultColumns()), joined_block_actions.getRequiredColumns().getNames()); for (auto & pr : required_columns_with_aliases) required_columns.push_back(pr.first); diff --git a/src/Interpreters/IInterpreterUnionOrSelectQuery.cpp b/src/Interpreters/IInterpreterUnionOrSelectQuery.cpp index fed29b410db..288d06d2220 100644 --- a/src/Interpreters/IInterpreterUnionOrSelectQuery.cpp +++ b/src/Interpreters/IInterpreterUnionOrSelectQuery.cpp @@ -99,16 +99,16 @@ static ASTPtr parseAdditionalPostFilter(const Context & context) "additional filter", settings.max_query_size, settings.max_parser_depth, settings.max_parser_backtracks); } -static ActionsDAGPtr makeAdditionalPostFilter(ASTPtr & ast, ContextPtr context, const Block & header) +static ActionsDAG makeAdditionalPostFilter(ASTPtr & ast, ContextPtr context, const Block & header) { auto syntax_result = TreeRewriter(context).analyze(ast, header.getNamesAndTypesList()); String result_column_name = ast->getColumnName(); auto dag = ExpressionAnalyzer(ast, syntax_result, context).getActionsDAG(false, false); - const ActionsDAG::Node * result_node = &dag->findInOutputs(result_column_name); - auto & outputs = dag->getOutputs(); + const ActionsDAG::Node * result_node = &dag.findInOutputs(result_column_name); + auto & outputs = dag.getOutputs(); outputs.clear(); - outputs.reserve(dag->getInputs().size() + 1); - for (const auto * node : dag->getInputs()) + outputs.reserve(dag.getInputs().size() + 1); + for (const auto * node : dag.getInputs()) outputs.push_back(node); outputs.push_back(result_node); @@ -126,7 +126,7 @@ void IInterpreterUnionOrSelectQuery::addAdditionalPostFilter(QueryPlan & plan) c return; auto dag = makeAdditionalPostFilter(ast, context, plan.getCurrentDataStream().header); - std::string filter_name = dag->getOutputs().back()->result_name; + std::string filter_name = dag.getOutputs().back()->result_name; auto filter_step = std::make_unique( plan.getCurrentDataStream(), std::move(dag), std::move(filter_name), true); filter_step->setStepDescription("Additional result filter"); diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index 09a7e440f31..cde6e305005 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -175,7 +175,7 @@ FilterDAGInfoPtr generateFilterActions( /// Using separate expression analyzer to prevent any possible alias injection auto syntax_result = TreeRewriter(context).analyzeSelect(query_ast, TreeRewriterResult({}, storage, storage_snapshot)); SelectQueryExpressionAnalyzer analyzer(query_ast, syntax_result, context, metadata_snapshot, {}, false, {}, prepared_sets); - filter_info->actions = std::make_unique(std::move(analyzer.simpleSelectActions()->dag)); + filter_info->actions = std::move(analyzer.simpleSelectActions()->dag); filter_info->column_name = expr_list->children.at(0)->getColumnName(); filter_info->actions->removeUnusedActions(NameSet{filter_info->column_name}); @@ -938,7 +938,8 @@ bool InterpreterSelectQuery::adjustParallelReplicasAfterAnalysis() } } - query_info_copy.filter_actions_dag = ActionsDAG::buildFilterActionsDAG(added_filter_nodes.nodes); + if (auto filter_actions_dag = ActionsDAG::buildFilterActionsDAG(added_filter_nodes.nodes)) + query_info_copy.filter_actions_dag = std::make_shared(std::move(*filter_actions_dag)); UInt64 rows_to_read = storage_merge_tree->estimateNumberOfRowsToRead(context, storage_snapshot, query_info_copy); /// Note that we treat an estimation of 0 rows as a real estimation size_t number_of_replicas_to_use = rows_to_read / settings.parallel_replicas_min_number_of_rows_per_replica; @@ -973,7 +974,7 @@ void InterpreterSelectQuery::buildQueryPlan(QueryPlan & query_plan) ActionsDAG::MatchColumnsMode::Name, true); - auto converting = std::make_unique(query_plan.getCurrentDataStream(), convert_actions_dag); + auto converting = std::make_unique(query_plan.getCurrentDataStream(), std::move(convert_actions_dag)); query_plan.addStep(std::move(converting)); } @@ -1297,10 +1298,10 @@ static InterpolateDescriptionPtr getInterpolateDescription( auto syntax_result = TreeRewriter(context).analyze(exprs, source_columns); ExpressionAnalyzer analyzer(exprs, syntax_result, context); - ActionsDAGPtr actions = analyzer.getActionsDAG(true); - ActionsDAGPtr conv_dag = ActionsDAG::makeConvertingActions(actions->getResultColumns(), + ActionsDAG actions = analyzer.getActionsDAG(true); + ActionsDAG conv_dag = ActionsDAG::makeConvertingActions(actions.getResultColumns(), result_columns, ActionsDAG::MatchColumnsMode::Position, true); - ActionsDAGPtr merge_dag = ActionsDAG::merge(std::move(* ActionsDAG::clone(actions)), std::move(*conv_dag)); + ActionsDAG merge_dag = ActionsDAG::merge(std::move(actions), std::move(conv_dag)); interpolate_descr = std::make_shared(std::move(merge_dag), aliases); } @@ -1485,7 +1486,7 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional

( query_plan.getCurrentDataStream(), - expressions.filter_info->actions, + std::move(*ActionsDAG::clone(&*expressions.filter_info->actions)), expressions.filter_info->column_name, expressions.filter_info->do_remove_column); @@ -1499,7 +1500,7 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional

( query_plan.getCurrentDataStream(), - expressions.prewhere_info->row_level_filter, + std::move(*ActionsDAG::clone(&*expressions.prewhere_info->row_level_filter)), expressions.prewhere_info->row_level_column_name, true); @@ -1509,7 +1510,7 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional

( query_plan.getCurrentDataStream(), - expressions.prewhere_info->prewhere_actions, + std::move(*ActionsDAG::clone(&*expressions.prewhere_info->prewhere_actions)), expressions.prewhere_info->prewhere_column_name, expressions.prewhere_info->remove_prewhere_column); @@ -1611,7 +1612,7 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional

( query_plan.getCurrentDataStream(), - expressions.filter_info->actions, + std::move(*ActionsDAG::clone(&*expressions.filter_info->actions)), expressions.filter_info->column_name, expressions.filter_info->do_remove_column); @@ -1623,7 +1624,7 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional

( query_plan.getCurrentDataStream(), - new_filter_info->actions, + std::move(*ActionsDAG::clone(&*new_filter_info->actions)), new_filter_info->column_name, new_filter_info->do_remove_column); @@ -2045,7 +2046,7 @@ void InterpreterSelectQuery::addEmptySourceToQueryPlan(QueryPlan & query_plan, c pipe.addSimpleTransform([&](const Block & header) { return std::make_shared(header, - std::make_shared(ActionsDAG::clone(prewhere_info.row_level_filter)), + std::make_shared(std::move(*ActionsDAG::clone(&*prewhere_info.row_level_filter))), prewhere_info.row_level_column_name, true); }); } @@ -2053,7 +2054,7 @@ void InterpreterSelectQuery::addEmptySourceToQueryPlan(QueryPlan & query_plan, c pipe.addSimpleTransform([&](const Block & header) { return std::make_shared( - header, std::make_shared(ActionsDAG::clone(prewhere_info.prewhere_actions)), + header, std::make_shared(std::move(*ActionsDAG::clone(&*prewhere_info.prewhere_actions))), prewhere_info.prewhere_column_name, prewhere_info.remove_prewhere_column); }); } @@ -2106,7 +2107,7 @@ void InterpreterSelectQuery::applyFiltersToPrewhereInAnalysis(ExpressionAnalysis else { /// Add row level security actions to prewhere. - analysis.prewhere_info->row_level_filter = std::move(analysis.filter_info->actions); + analysis.prewhere_info->row_level_filter = std::move(*analysis.filter_info->actions); analysis.prewhere_info->row_level_column_name = std::move(analysis.filter_info->column_name); analysis.filter_info = nullptr; } @@ -2323,7 +2324,7 @@ std::optional InterpreterSelectQuery::getTrivialCount(UInt64 max_paralle if (!filter_actions_dag) return {}; - return storage->totalRowsByPartitionPredicate(filter_actions_dag, context); + return storage->totalRowsByPartitionPredicate(*filter_actions_dag, context); } } @@ -2573,7 +2574,7 @@ void InterpreterSelectQuery::executeFetchColumns(QueryProcessingStage::Enum proc /// Aliases in table declaration. if (processing_stage == QueryProcessingStage::FetchColumns && alias_actions) { - auto table_aliases = std::make_unique(query_plan.getCurrentDataStream(), alias_actions); + auto table_aliases = std::make_unique(query_plan.getCurrentDataStream(), std::move(*ActionsDAG::clone(&*alias_actions))); table_aliases->setStepDescription("Add table aliases"); query_plan.addStep(std::move(table_aliases)); } @@ -2581,9 +2582,9 @@ void InterpreterSelectQuery::executeFetchColumns(QueryProcessingStage::Enum proc void InterpreterSelectQuery::executeWhere(QueryPlan & query_plan, const ActionsAndProjectInputsFlagPtr & expression, bool remove_filter) { - auto dag = ActionsDAG::clone(&expression->dag); + auto dag = std::move(*ActionsDAG::clone(&expression->dag)); if (expression->project_input) - dag->appendInputsForUnusedColumns(query_plan.getCurrentDataStream().header); + dag.appendInputsForUnusedColumns(query_plan.getCurrentDataStream().header); auto where_step = std::make_unique( query_plan.getCurrentDataStream(), std::move(dag), getSelectQuery().where()->getColumnName(), remove_filter); @@ -2755,9 +2756,9 @@ void InterpreterSelectQuery::executeMergeAggregated(QueryPlan & query_plan, bool void InterpreterSelectQuery::executeHaving(QueryPlan & query_plan, const ActionsAndProjectInputsFlagPtr & expression, bool remove_filter) { - auto dag = ActionsDAG::clone(&expression->dag); + auto dag = std::move(*ActionsDAG::clone(&expression->dag)); if (expression->project_input) - dag->appendInputsForUnusedColumns(query_plan.getCurrentDataStream().header); + dag.appendInputsForUnusedColumns(query_plan.getCurrentDataStream().header); auto having_step = std::make_unique(query_plan.getCurrentDataStream(), std::move(dag), getSelectQuery().having()->getColumnName(), remove_filter); @@ -2770,10 +2771,10 @@ void InterpreterSelectQuery::executeHaving(QueryPlan & query_plan, const Actions void InterpreterSelectQuery::executeTotalsAndHaving( QueryPlan & query_plan, bool has_having, const ActionsAndProjectInputsFlagPtr & expression, bool remove_filter, bool overflow_row, bool final) { - ActionsDAGPtr dag; + std::optional dag; if (expression) { - dag = ActionsDAG::clone(&expression->dag); + dag = std::move(*ActionsDAG::clone(&expression->dag)); if (expression->project_input) dag->appendInputsForUnusedColumns(query_plan.getCurrentDataStream().header); } @@ -2822,9 +2823,9 @@ void InterpreterSelectQuery::executeExpression(QueryPlan & query_plan, const Act if (!expression) return; - auto dag = ActionsDAG::clone(&expression->dag); + ActionsDAG dag = std::move(*ActionsDAG::clone(&expression->dag)); if (expression->project_input) - dag->appendInputsForUnusedColumns(query_plan.getCurrentDataStream().header); + dag.appendInputsForUnusedColumns(query_plan.getCurrentDataStream().header); auto expression_step = std::make_unique(query_plan.getCurrentDataStream(), std::move(dag)); diff --git a/src/Interpreters/InterpreterSelectQuery.h b/src/Interpreters/InterpreterSelectQuery.h index d4ed19d45ea..ed6dd8af3b2 100644 --- a/src/Interpreters/InterpreterSelectQuery.h +++ b/src/Interpreters/InterpreterSelectQuery.h @@ -240,7 +240,7 @@ private: Block source_header; /// Actions to calculate ALIAS if required. - ActionsDAGPtr alias_actions; + std::optional alias_actions; /// The subquery interpreter, if the subquery std::unique_ptr interpreter_subquery; diff --git a/src/Interpreters/MutationsInterpreter.cpp b/src/Interpreters/MutationsInterpreter.cpp index 704c5ce7d8b..2372d26e83f 100644 --- a/src/Interpreters/MutationsInterpreter.cpp +++ b/src/Interpreters/MutationsInterpreter.cpp @@ -1197,7 +1197,7 @@ void MutationsInterpreter::Source::read( const auto & names = first_stage.filter_column_names; size_t num_filters = names.size(); - ActionsDAGPtr filter; + std::optional filter; if (!first_stage.filter_column_names.empty()) { ActionsDAG::NodeRawConstPtrs nodes(num_filters); @@ -1278,19 +1278,19 @@ QueryPipelineBuilder MutationsInterpreter::addStreamsForLaterStages(const std::v if (i < stage.filter_column_names.size()) { - auto dag = ActionsDAG::clone(&step->actions()->dag); + auto dag = std::move(*ActionsDAG::clone(&step->actions()->dag)); if (step->actions()->project_input) - dag->appendInputsForUnusedColumns(plan.getCurrentDataStream().header); + dag.appendInputsForUnusedColumns(plan.getCurrentDataStream().header); /// Execute DELETEs. - plan.addStep(std::make_unique(plan.getCurrentDataStream(), dag, stage.filter_column_names[i], false)); + plan.addStep(std::make_unique(plan.getCurrentDataStream(), std::move(dag), stage.filter_column_names[i], false)); } else { - auto dag = ActionsDAG::clone(&step->actions()->dag); + auto dag = std::move(*ActionsDAG::clone(&step->actions()->dag)); if (step->actions()->project_input) - dag->appendInputsForUnusedColumns(plan.getCurrentDataStream().header); + dag.appendInputsForUnusedColumns(plan.getCurrentDataStream().header); /// Execute UPDATE or final projection. - plan.addStep(std::make_unique(plan.getCurrentDataStream(), dag)); + plan.addStep(std::make_unique(plan.getCurrentDataStream(), std::move(dag))); } } diff --git a/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp b/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp index 4821d607d0e..c1d7acf0775 100644 --- a/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp +++ b/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp @@ -182,7 +182,7 @@ static NamesAndTypesList getNames(const ASTFunction & expr, ContextPtr context, ASTPtr temp_ast = expr.clone(); auto syntax = TreeRewriter(context).analyze(temp_ast, columns); - auto required_columns = ExpressionAnalyzer(temp_ast, syntax, context).getActionsDAG(false)->getRequiredColumns(); + auto required_columns = ExpressionAnalyzer(temp_ast, syntax, context).getActionsDAG(false).getRequiredColumns(); return required_columns; } diff --git a/src/Interpreters/TableJoin.cpp b/src/Interpreters/TableJoin.cpp index baf3a743f40..c8c926db13c 100644 --- a/src/Interpreters/TableJoin.cpp +++ b/src/Interpreters/TableJoin.cpp @@ -462,19 +462,19 @@ static void makeColumnNameUnique(const ColumnsWithTypeAndName & source_columns, } } -static ActionsDAGPtr createWrapWithTupleActions( +static std::optional createWrapWithTupleActions( const ColumnsWithTypeAndName & source_columns, std::unordered_set && column_names_to_wrap, NameToNameMap & new_names) { if (column_names_to_wrap.empty()) - return nullptr; + return {}; - auto actions_dag = std::make_unique(source_columns); + ActionsDAG actions_dag(source_columns); FunctionOverloadResolverPtr func_builder = std::make_unique(std::make_shared()); - for (const auto * input_node : actions_dag->getInputs()) + for (const auto * input_node : actions_dag.getInputs()) { const auto & column_name = input_node->result_name; auto it = column_names_to_wrap.find(column_name); @@ -485,9 +485,9 @@ static ActionsDAGPtr createWrapWithTupleActions( String node_name = "__wrapNullsafe(" + column_name + ")"; makeColumnNameUnique(source_columns, node_name); - const auto & dst_node = actions_dag->addFunction(func_builder, {input_node}, node_name); + const auto & dst_node = actions_dag.addFunction(func_builder, {input_node}, node_name); new_names[column_name] = dst_node.result_name; - actions_dag->addOrReplaceInOutputs(dst_node); + actions_dag.addOrReplaceInOutputs(dst_node); } if (!column_names_to_wrap.empty()) @@ -537,21 +537,23 @@ std::pair TableJoin::getKeysForNullSafeComparion(const Columns return {left_keys_to_wrap, right_keys_to_wrap}; } -static void mergeDags(ActionsDAGPtr & result_dag, ActionsDAGPtr && new_dag) +static void mergeDags(std::optional & result_dag, std::optional && new_dag) { + if (!new_dag) + return; if (result_dag) result_dag->mergeInplace(std::move(*new_dag)); else result_dag = std::move(new_dag); } -std::pair +std::pair, std::optional> TableJoin::createConvertingActions( const ColumnsWithTypeAndName & left_sample_columns, const ColumnsWithTypeAndName & right_sample_columns) { - ActionsDAGPtr left_dag = nullptr; - ActionsDAGPtr right_dag = nullptr; + std::optional left_dag; + std::optional right_dag; /** If the types are not equal, we need to convert them to a common type. * Example: * SELECT * FROM t1 JOIN t2 ON t1.a = t2.b @@ -693,7 +695,7 @@ void TableJoin::inferJoinKeyCommonType(const LeftNamesAndTypes & left, const Rig } } -static ActionsDAGPtr changeKeyTypes(const ColumnsWithTypeAndName & cols_src, +static std::optional changeKeyTypes(const ColumnsWithTypeAndName & cols_src, const TableJoin::NameToTypeMap & type_mapping, bool add_new_cols, NameToNameMap & key_column_rename) @@ -710,7 +712,7 @@ static ActionsDAGPtr changeKeyTypes(const ColumnsWithTypeAndName & cols_src, } } if (!has_some_to_do) - return nullptr; + return {}; return ActionsDAG::makeConvertingActions( /* source= */ cols_src, @@ -721,7 +723,7 @@ static ActionsDAGPtr changeKeyTypes(const ColumnsWithTypeAndName & cols_src, /* new_names= */ &key_column_rename); } -static ActionsDAGPtr changeTypesToNullable( +static std::optional changeTypesToNullable( const ColumnsWithTypeAndName & cols_src, const NameSet & exception_cols) { @@ -737,7 +739,7 @@ static ActionsDAGPtr changeTypesToNullable( } if (!has_some_to_do) - return nullptr; + return {}; return ActionsDAG::makeConvertingActions( /* source= */ cols_src, @@ -748,29 +750,29 @@ static ActionsDAGPtr changeTypesToNullable( /* new_names= */ nullptr); } -ActionsDAGPtr TableJoin::applyKeyConvertToTable( +std::optional TableJoin::applyKeyConvertToTable( const ColumnsWithTypeAndName & cols_src, const NameToTypeMap & type_mapping, JoinTableSide table_side, NameToNameMap & key_column_rename) { if (type_mapping.empty()) - return nullptr; + return {}; /// Create DAG to convert key columns - ActionsDAGPtr convert_dag = changeKeyTypes(cols_src, type_mapping, !hasUsing(), key_column_rename); + auto convert_dag = changeKeyTypes(cols_src, type_mapping, !hasUsing(), key_column_rename); applyRename(table_side, key_column_rename); return convert_dag; } -ActionsDAGPtr TableJoin::applyNullsafeWrapper( +std::optional TableJoin::applyNullsafeWrapper( const ColumnsWithTypeAndName & cols_src, const NameSet & columns_for_nullsafe_comparison, JoinTableSide table_side, NameToNameMap & key_column_rename) { if (columns_for_nullsafe_comparison.empty()) - return nullptr; + return {}; std::unordered_set column_names_to_wrap; for (const auto & name : columns_for_nullsafe_comparison) @@ -784,7 +786,7 @@ ActionsDAGPtr TableJoin::applyNullsafeWrapper( } /// Create DAG to wrap keys with tuple for null-safe comparison - ActionsDAGPtr null_safe_wrap_dag = createWrapWithTupleActions(cols_src, std::move(column_names_to_wrap), key_column_rename); + auto null_safe_wrap_dag = createWrapWithTupleActions(cols_src, std::move(column_names_to_wrap), key_column_rename); for (auto & clause : clauses) { for (size_t i : clause.nullsafe_compare_key_indexes) @@ -799,7 +801,7 @@ ActionsDAGPtr TableJoin::applyNullsafeWrapper( return null_safe_wrap_dag; } -ActionsDAGPtr TableJoin::applyJoinUseNullsConversion( +std::optional TableJoin::applyJoinUseNullsConversion( const ColumnsWithTypeAndName & cols_src, const NameToNameMap & key_column_rename) { @@ -809,8 +811,7 @@ ActionsDAGPtr TableJoin::applyJoinUseNullsConversion( exclude_columns.insert(it.second); /// Create DAG to make columns nullable if needed - ActionsDAGPtr add_nullable_dag = changeTypesToNullable(cols_src, exclude_columns); - return add_nullable_dag; + return changeTypesToNullable(cols_src, exclude_columns); } void TableJoin::setStorageJoin(std::shared_ptr storage) @@ -957,7 +958,7 @@ bool TableJoin::allowParallelHashJoin() const return true; } -ActionsDAGPtr TableJoin::createJoinedBlockActions(ContextPtr context) const +ActionsDAG TableJoin::createJoinedBlockActions(ContextPtr context) const { ASTPtr expression_list = rightKeysList(); auto syntax_result = TreeRewriter(context).analyze(expression_list, columnsFromJoinedTable()); diff --git a/src/Interpreters/TableJoin.h b/src/Interpreters/TableJoin.h index 8e83233e54c..a057d46b94d 100644 --- a/src/Interpreters/TableJoin.h +++ b/src/Interpreters/TableJoin.h @@ -202,19 +202,19 @@ private: Names requiredJoinedNames() const; /// Create converting actions and change key column names if required - ActionsDAGPtr applyKeyConvertToTable( + std::optional applyKeyConvertToTable( const ColumnsWithTypeAndName & cols_src, const NameToTypeMap & type_mapping, JoinTableSide table_side, NameToNameMap & key_column_rename); - ActionsDAGPtr applyNullsafeWrapper( + std::optional applyNullsafeWrapper( const ColumnsWithTypeAndName & cols_src, const NameSet & columns_for_nullsafe_comparison, JoinTableSide table_side, NameToNameMap & key_column_rename); - ActionsDAGPtr applyJoinUseNullsConversion( + std::optional applyJoinUseNullsConversion( const ColumnsWithTypeAndName & cols_src, const NameToNameMap & key_column_rename); @@ -264,7 +264,7 @@ public: TemporaryDataOnDiskScopePtr getTempDataOnDisk() { return tmp_data; } - ActionsDAGPtr createJoinedBlockActions(ContextPtr context) const; + ActionsDAG createJoinedBlockActions(ContextPtr context) const; const std::vector & getEnabledJoinAlgorithms() const { return join_algorithm; } @@ -379,7 +379,7 @@ public: /// Calculate converting actions, rename key columns in required /// For `USING` join we will convert key columns inplace and affect into types in the result table /// For `JOIN ON` we will create new columns with converted keys to join by. - std::pair + std::pair, std::optional> createConvertingActions( const ColumnsWithTypeAndName & left_sample_columns, const ColumnsWithTypeAndName & right_sample_columns); diff --git a/src/Interpreters/addMissingDefaults.cpp b/src/Interpreters/addMissingDefaults.cpp index 929999c8c37..27d79e86622 100644 --- a/src/Interpreters/addMissingDefaults.cpp +++ b/src/Interpreters/addMissingDefaults.cpp @@ -14,15 +14,15 @@ namespace DB { -ActionsDAGPtr addMissingDefaults( +ActionsDAG addMissingDefaults( const Block & header, const NamesAndTypesList & required_columns, const ColumnsDescription & columns, ContextPtr context, bool null_as_default) { - auto actions = std::make_unique(header.getColumnsWithTypeAndName()); - auto & index = actions->getOutputs(); + ActionsDAG actions(header.getColumnsWithTypeAndName()); + auto & index = actions.getOutputs(); /// For missing columns of nested structure, you need to create not a column of empty arrays, but a column of arrays of correct lengths. /// First, remember the offset columns for all arrays in the block. @@ -40,7 +40,7 @@ ActionsDAGPtr addMissingDefaults( if (group.empty()) group.push_back(nullptr); - group.push_back(actions->getInputs()[i]); + group.push_back(actions.getInputs()[i]); } } @@ -62,11 +62,11 @@ ActionsDAGPtr addMissingDefaults( { const auto & nested_type = array_type->getNestedType(); ColumnPtr nested_column = nested_type->createColumnConstWithDefaultValue(0); - const auto & constant = actions->addColumn({nested_column, nested_type, column.name}); + const auto & constant = actions.addColumn({nested_column, nested_type, column.name}); auto & group = nested_groups[offsets_name]; group[0] = &constant; - index.push_back(&actions->addFunction(func_builder_replicate, group, constant.result_name)); + index.push_back(&actions.addFunction(func_builder_replicate, group, constant.result_name)); continue; } @@ -75,17 +75,17 @@ ActionsDAGPtr addMissingDefaults( * it can be full (or the interpreter may decide that it is constant everywhere). */ auto new_column = column.type->createColumnConstWithDefaultValue(0); - const auto * col = &actions->addColumn({new_column, column.type, column.name}); - index.push_back(&actions->materializeNode(*col)); + const auto * col = &actions.addColumn({new_column, column.type, column.name}); + index.push_back(&actions.materializeNode(*col)); } /// Computes explicitly specified values by default and materialized columns. - if (auto dag = evaluateMissingDefaults(actions->getResultColumns(), required_columns, columns, context, true, null_as_default)) - actions = ActionsDAG::merge(std::move(*actions), std::move(*dag)); + if (auto dag = evaluateMissingDefaults(actions.getResultColumns(), required_columns, columns, context, true, null_as_default)) + actions = ActionsDAG::merge(std::move(actions), std::move(*dag)); /// Removes unused columns and reorders result. - actions->removeUnusedActions(required_columns.getNames(), false); - actions->addMaterializingOutputActions(); + actions.removeUnusedActions(required_columns.getNames(), false); + actions.addMaterializingOutputActions(); return actions; } diff --git a/src/Interpreters/addMissingDefaults.h b/src/Interpreters/addMissingDefaults.h index 94afd806dfd..5299bae9745 100644 --- a/src/Interpreters/addMissingDefaults.h +++ b/src/Interpreters/addMissingDefaults.h @@ -24,7 +24,7 @@ using ActionsDAGPtr = std::unique_ptr; * Also can substitute NULL with DEFAULT value in case of INSERT SELECT query (null_as_default) if according setting is 1. * All three types of columns are materialized (not constants). */ -ActionsDAGPtr addMissingDefaults( +ActionsDAG addMissingDefaults( const Block & header, const NamesAndTypesList & required_columns, const ColumnsDescription & columns, ContextPtr context, bool null_as_default = false); } diff --git a/src/Interpreters/evaluateConstantExpression.cpp b/src/Interpreters/evaluateConstantExpression.cpp index 4e1a2bcf5ee..d5d9fce0dbd 100644 --- a/src/Interpreters/evaluateConstantExpression.cpp +++ b/src/Interpreters/evaluateConstantExpression.cpp @@ -89,7 +89,7 @@ std::optional evaluateConstantExpressionImpl(c ColumnPtr result_column; DataTypePtr result_type; String result_name = ast->getColumnName(); - for (const auto & action_node : actions->getOutputs()) + for (const auto & action_node : actions.getOutputs()) { if ((action_node->result_name == result_name) && action_node->column) { diff --git a/src/Interpreters/inplaceBlockConversions.cpp b/src/Interpreters/inplaceBlockConversions.cpp index b000264ae33..62f8aea86d1 100644 --- a/src/Interpreters/inplaceBlockConversions.cpp +++ b/src/Interpreters/inplaceBlockConversions.cpp @@ -152,22 +152,20 @@ ASTPtr convertRequiredExpressions(Block & block, const NamesAndTypesList & requi return conversion_expr_list; } -ActionsDAGPtr createExpressions( +std::optional createExpressions( const Block & header, ASTPtr expr_list, bool save_unneeded_columns, ContextPtr context) { if (!expr_list) - return nullptr; + return {}; auto syntax_result = TreeRewriter(context).analyze(expr_list, header.getNamesAndTypesList()); auto expression_analyzer = ExpressionAnalyzer{expr_list, syntax_result, context}; - auto dag = std::make_unique(header.getNamesAndTypesList()); + ActionsDAG dag(header.getNamesAndTypesList()); auto actions = expression_analyzer.getActionsDAG(true, !save_unneeded_columns); - dag = ActionsDAG::merge(std::move(*dag), std::move(*actions)); - - return dag; + return ActionsDAG::merge(std::move(dag), std::move(actions)); } } @@ -180,7 +178,7 @@ void performRequiredConversions(Block & block, const NamesAndTypesList & require if (auto dag = createExpressions(block, conversion_expr_list, true, context)) { - auto expression = std::make_shared(std::move(dag), ExpressionActionsSettings::fromContext(context)); + auto expression = std::make_shared(std::move(*dag), ExpressionActionsSettings::fromContext(context)); expression->execute(block); } } @@ -195,7 +193,7 @@ bool needConvertAnyNullToDefault(const Block & header, const NamesAndTypesList & return false; } -ActionsDAGPtr evaluateMissingDefaults( +std::optional evaluateMissingDefaults( const Block & header, const NamesAndTypesList & required_columns, const ColumnsDescription & columns, @@ -204,7 +202,7 @@ ActionsDAGPtr evaluateMissingDefaults( bool null_as_default) { if (!columns.hasDefaults() && (!null_as_default || !needConvertAnyNullToDefault(header, required_columns, columns))) - return nullptr; + return {}; ASTPtr expr_list = defaultRequiredExpressions(header, required_columns, columns, null_as_default); return createExpressions(header, expr_list, save_unneeded_columns, context); diff --git a/src/Interpreters/inplaceBlockConversions.h b/src/Interpreters/inplaceBlockConversions.h index ffc77561e79..570eb75dd4a 100644 --- a/src/Interpreters/inplaceBlockConversions.h +++ b/src/Interpreters/inplaceBlockConversions.h @@ -5,9 +5,6 @@ #include #include -#include -#include - namespace DB { @@ -24,12 +21,11 @@ struct StorageInMemoryMetadata; using StorageMetadataPtr = std::shared_ptr; class ActionsDAG; -using ActionsDAGPtr = std::unique_ptr; /// Create actions which adds missing defaults to block according to required_columns using columns description /// or substitute NULL into DEFAULT value in case of INSERT SELECT query (null_as_default) if according setting is 1. /// Return nullptr if no actions required. -ActionsDAGPtr evaluateMissingDefaults( +std::optional evaluateMissingDefaults( const Block & header, const NamesAndTypesList & required_columns, const ColumnsDescription & columns, diff --git a/src/Planner/Planner.cpp b/src/Planner/Planner.cpp index 16ee6de73c4..48e42099ce8 100644 --- a/src/Planner/Planner.cpp +++ b/src/Planner/Planner.cpp @@ -332,12 +332,12 @@ void addExpressionStep(QueryPlan & query_plan, const std::string & step_description, UsefulSets & useful_sets) { - auto actions = ActionsDAG::clone(&expression_actions->dag); + auto actions = std::move(*ActionsDAG::clone(&expression_actions->dag)); if (expression_actions->project_input) - actions->appendInputsForUnusedColumns(query_plan.getCurrentDataStream().header); + actions.appendInputsForUnusedColumns(query_plan.getCurrentDataStream().header); - auto expression_step = std::make_unique(query_plan.getCurrentDataStream(), actions); - appendSetsFromActionsDAG(*expression_step->getExpression(), useful_sets); + auto expression_step = std::make_unique(query_plan.getCurrentDataStream(), std::move(actions)); + appendSetsFromActionsDAG(expression_step->getExpression(), useful_sets); expression_step->setStepDescription(step_description); query_plan.addStep(std::move(expression_step)); } @@ -347,15 +347,15 @@ void addFilterStep(QueryPlan & query_plan, const std::string & step_description, UsefulSets & useful_sets) { - auto actions = ActionsDAG::clone(&filter_analysis_result.filter_actions->dag); + auto actions = std::move(*ActionsDAG::clone(&filter_analysis_result.filter_actions->dag)); if (filter_analysis_result.filter_actions->project_input) - actions->appendInputsForUnusedColumns(query_plan.getCurrentDataStream().header); + actions.appendInputsForUnusedColumns(query_plan.getCurrentDataStream().header); auto where_step = std::make_unique(query_plan.getCurrentDataStream(), - actions, + std::move(actions), filter_analysis_result.filter_column_name, filter_analysis_result.remove_filter_column); - appendSetsFromActionsDAG(*where_step->getExpression(), useful_sets); + appendSetsFromActionsDAG(where_step->getExpression(), useful_sets); where_step->setStepDescription(step_description); query_plan.addStep(std::move(where_step)); } @@ -552,10 +552,10 @@ void addTotalsHavingStep(QueryPlan & query_plan, const auto & having_analysis_result = expression_analysis_result.getHaving(); bool need_finalize = !query_node.isGroupByWithRollup() && !query_node.isGroupByWithCube(); - ActionsDAGPtr actions; + std::optional actions; if (having_analysis_result.filter_actions) { - actions = ActionsDAG::clone(&having_analysis_result.filter_actions->dag); + actions = std::move(*ActionsDAG::clone(&having_analysis_result.filter_actions->dag)); if (having_analysis_result.filter_actions->project_input) actions->appendInputsForUnusedColumns(query_plan.getCurrentDataStream().header); } @@ -564,7 +564,7 @@ void addTotalsHavingStep(QueryPlan & query_plan, query_plan.getCurrentDataStream(), aggregation_analysis_result.aggregate_descriptions, query_analysis_result.aggregate_overflow_row, - actions, + std::move(actions), having_analysis_result.filter_column_name, having_analysis_result.remove_filter_column, settings.totals_mode, @@ -715,13 +715,13 @@ void addWithFillStepIfNeeded(QueryPlan & query_plan, if (query_node.hasInterpolate()) { - auto interpolate_actions_dag = std::make_unique(); + ActionsDAG interpolate_actions_dag; auto query_plan_columns = query_plan.getCurrentDataStream().header.getColumnsWithTypeAndName(); for (auto & query_plan_column : query_plan_columns) { /// INTERPOLATE actions dag input columns must be non constant query_plan_column.column = nullptr; - interpolate_actions_dag->addInput(query_plan_column); + interpolate_actions_dag.addInput(query_plan_column); } auto & interpolate_list_node = query_node.getInterpolate()->as(); @@ -729,12 +729,12 @@ void addWithFillStepIfNeeded(QueryPlan & query_plan, if (interpolate_list_nodes.empty()) { - for (const auto * input_node : interpolate_actions_dag->getInputs()) + for (const auto * input_node : interpolate_actions_dag.getInputs()) { if (column_names_with_fill.contains(input_node->result_name)) continue; - interpolate_actions_dag->getOutputs().push_back(input_node); + interpolate_actions_dag.getOutputs().push_back(input_node); } } else @@ -744,12 +744,12 @@ void addWithFillStepIfNeeded(QueryPlan & query_plan, auto & interpolate_node_typed = interpolate_node->as(); PlannerActionsVisitor planner_actions_visitor(planner_context); - auto expression_to_interpolate_expression_nodes = planner_actions_visitor.visit(*interpolate_actions_dag, + auto expression_to_interpolate_expression_nodes = planner_actions_visitor.visit(interpolate_actions_dag, interpolate_node_typed.getExpression()); if (expression_to_interpolate_expression_nodes.size() != 1) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expression to interpolate expected to have single action node"); - auto interpolate_expression_nodes = planner_actions_visitor.visit(*interpolate_actions_dag, + auto interpolate_expression_nodes = planner_actions_visitor.visit(interpolate_actions_dag, interpolate_node_typed.getInterpolateExpression()); if (interpolate_expression_nodes.size() != 1) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Interpolate expression expected to have single action node"); @@ -760,16 +760,16 @@ void addWithFillStepIfNeeded(QueryPlan & query_plan, const auto * interpolate_expression = interpolate_expression_nodes[0]; if (!interpolate_expression->result_type->equals(*expression_to_interpolate->result_type)) { - interpolate_expression = &interpolate_actions_dag->addCast(*interpolate_expression, + interpolate_expression = &interpolate_actions_dag.addCast(*interpolate_expression, expression_to_interpolate->result_type, interpolate_expression->result_name); } - const auto * alias_node = &interpolate_actions_dag->addAlias(*interpolate_expression, expression_to_interpolate_name); - interpolate_actions_dag->getOutputs().push_back(alias_node); + const auto * alias_node = &interpolate_actions_dag.addAlias(*interpolate_expression, expression_to_interpolate_name); + interpolate_actions_dag.getOutputs().push_back(alias_node); } - interpolate_actions_dag->removeUnusedActions(); + interpolate_actions_dag.removeUnusedActions(); } Aliases empty_aliases; @@ -1130,7 +1130,7 @@ void addAdditionalFilterStepIfNeeded(QueryPlan & query_plan, return; auto filter_step = std::make_unique(query_plan.getCurrentDataStream(), - filter_info.actions, + std::move(*filter_info.actions), filter_info.column_name, filter_info.do_remove_column); filter_step->setStepDescription("additional result filter"); @@ -1418,7 +1418,7 @@ void Planner::buildPlanForQueryNode() if (it != table_filters.end()) { const auto & filters = it->second; - table_expression_data.setFilterActions(ActionsDAG::clone(filters.filter_actions)); + table_expression_data.setFilterActions(ActionsDAG::clone(&*filters.filter_actions)); table_expression_data.setPrewhereInfo(filters.prewhere_info); } } diff --git a/src/Planner/PlannerActionsVisitor.cpp b/src/Planner/PlannerActionsVisitor.cpp index 59ec7778e21..4c0c9bc7937 100644 --- a/src/Planner/PlannerActionsVisitor.cpp +++ b/src/Planner/PlannerActionsVisitor.cpp @@ -757,12 +757,12 @@ PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::vi lambda_arguments_names_and_types.emplace_back(lambda_argument_name, std::move(lambda_argument_type)); } - auto lambda_actions_dag = std::make_unique(); - actions_stack.emplace_back(*lambda_actions_dag, node); + ActionsDAG lambda_actions_dag; + actions_stack.emplace_back(lambda_actions_dag, node); auto [lambda_expression_node_name, levels] = visitImpl(lambda_node.getExpression()); - lambda_actions_dag->getOutputs().push_back(actions_stack.back().getNodeOrThrow(lambda_expression_node_name)); - lambda_actions_dag->removeUnusedActions(Names(1, lambda_expression_node_name)); + lambda_actions_dag.getOutputs().push_back(actions_stack.back().getNodeOrThrow(lambda_expression_node_name)); + lambda_actions_dag.removeUnusedActions(Names(1, lambda_expression_node_name)); auto expression_actions_settings = ExpressionActionsSettings::fromContext(planner_context->getQueryContext(), CompileExpressions::yes); auto lambda_actions = std::make_shared(std::move(lambda_actions_dag), expression_actions_settings); @@ -879,14 +879,14 @@ PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::vi const auto & function_node = node->as(); auto function_node_name = action_node_name_helper.calculateActionNodeName(node); - auto index_hint_actions_dag = std::make_unique(); - auto & index_hint_actions_dag_outputs = index_hint_actions_dag->getOutputs(); + ActionsDAG index_hint_actions_dag; + auto & index_hint_actions_dag_outputs = index_hint_actions_dag.getOutputs(); std::unordered_set index_hint_actions_dag_output_node_names; PlannerActionsVisitor actions_visitor(planner_context); for (const auto & argument : function_node.getArguments()) { - auto index_hint_argument_expression_dag_nodes = actions_visitor.visit(*index_hint_actions_dag, argument); + auto index_hint_argument_expression_dag_nodes = actions_visitor.visit(index_hint_actions_dag, argument); for (auto & expression_dag_node : index_hint_argument_expression_dag_nodes) { diff --git a/src/Planner/PlannerContext.h b/src/Planner/PlannerContext.h index 418240fa34e..f35772ef7c0 100644 --- a/src/Planner/PlannerContext.h +++ b/src/Planner/PlannerContext.h @@ -25,7 +25,7 @@ class TableNode; struct FiltersForTableExpression { - ActionsDAGPtr filter_actions; + std::optional filter_actions; PrewhereInfoPtr prewhere_info; }; diff --git a/src/Planner/PlannerJoinTree.cpp b/src/Planner/PlannerJoinTree.cpp index 94054588d40..fa3a3483a8e 100644 --- a/src/Planner/PlannerJoinTree.cpp +++ b/src/Planner/PlannerJoinTree.cpp @@ -591,19 +591,19 @@ UInt64 mainQueryNodeBlockSizeByLimit(const SelectQueryInfo & select_query_info) std::unique_ptr createComputeAliasColumnsStep( const std::unordered_map & alias_column_expressions, const DataStream & current_data_stream) { - ActionsDAGPtr merged_alias_columns_actions_dag = std::make_unique(current_data_stream.header.getColumnsWithTypeAndName()); - ActionsDAG::NodeRawConstPtrs action_dag_outputs = merged_alias_columns_actions_dag->getInputs(); + ActionsDAG merged_alias_columns_actions_dag(current_data_stream.header.getColumnsWithTypeAndName()); + ActionsDAG::NodeRawConstPtrs action_dag_outputs = merged_alias_columns_actions_dag.getInputs(); for (const auto & [column_name, alias_column_actions_dag] : alias_column_expressions) { const auto & current_outputs = alias_column_actions_dag->getOutputs(); action_dag_outputs.insert(action_dag_outputs.end(), current_outputs.begin(), current_outputs.end()); - merged_alias_columns_actions_dag->mergeNodes(std::move(*alias_column_actions_dag)); + merged_alias_columns_actions_dag.mergeNodes(std::move(*alias_column_actions_dag)); } for (const auto * output_node : action_dag_outputs) - merged_alias_columns_actions_dag->addOrReplaceInOutputs(*output_node); - merged_alias_columns_actions_dag->removeUnusedActions(false); + merged_alias_columns_actions_dag.addOrReplaceInOutputs(*output_node); + merged_alias_columns_actions_dag.removeUnusedActions(false); auto alias_column_step = std::make_unique(current_data_stream, std::move(merged_alias_columns_actions_dag)); alias_column_step->setStepDescription("Compute alias columns"); @@ -776,7 +776,7 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres if (prewhere_actions) { prewhere_info = std::make_shared(); - prewhere_info->prewhere_actions = ActionsDAG::clone(prewhere_actions); + prewhere_info->prewhere_actions = std::move(*ActionsDAG::clone(prewhere_actions)); prewhere_info->prewhere_column_name = prewhere_actions->getOutputs().at(0)->result_name; prewhere_info->remove_prewhere_column = true; prewhere_info->need_filter = true; @@ -805,14 +805,14 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres if (!prewhere_info->prewhere_actions) { - prewhere_info->prewhere_actions = std::move(filter_info.actions); + prewhere_info->prewhere_actions = std::move(*filter_info.actions); prewhere_info->prewhere_column_name = filter_info.column_name; prewhere_info->remove_prewhere_column = filter_info.do_remove_column; prewhere_info->need_filter = true; } else if (!prewhere_info->row_level_filter) { - prewhere_info->row_level_filter = std::move(filter_info.actions); + prewhere_info->row_level_filter = std::move(*filter_info.actions); prewhere_info->row_level_column_name = filter_info.column_name; prewhere_info->need_filter = true; } @@ -831,7 +831,7 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres auto row_policy_filter_info = buildRowPolicyFilterIfNeeded(storage, table_expression_query_info, planner_context, used_row_policies); if (row_policy_filter_info.actions) - table_expression_data.setRowLevelFilterActions(ActionsDAG::clone(row_policy_filter_info.actions)); + table_expression_data.setRowLevelFilterActions(ActionsDAG::clone(&*row_policy_filter_info.actions)); add_filter(row_policy_filter_info, "Row-level security filter"); if (query_context->getParallelReplicasMode() == Context::ParallelReplicasMode::CUSTOM_KEY) @@ -964,15 +964,14 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres query_plan.addStep(std::move(alias_column_step)); } - for (const auto & filter_info_and_description : where_filters) + for (auto && [filter_info, description] : where_filters) { - const auto & [filter_info, description] = filter_info_and_description; if (query_plan.isInitialized() && from_stage == QueryProcessingStage::FetchColumns && filter_info.actions) { auto filter_step = std::make_unique(query_plan.getCurrentDataStream(), - filter_info.actions, + std::move(*filter_info.actions), filter_info.column_name, filter_info.do_remove_column); filter_step->setStepDescription(description); @@ -1063,19 +1062,19 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres if (from_stage == QueryProcessingStage::FetchColumns) { - auto rename_actions_dag = std::make_unique(query_plan.getCurrentDataStream().header.getColumnsWithTypeAndName()); + ActionsDAG rename_actions_dag(query_plan.getCurrentDataStream().header.getColumnsWithTypeAndName()); ActionsDAG::NodeRawConstPtrs updated_actions_dag_outputs; - for (auto & output_node : rename_actions_dag->getOutputs()) + for (auto & output_node : rename_actions_dag.getOutputs()) { const auto * column_identifier = table_expression_data.getColumnIdentifierOrNull(output_node->result_name); if (!column_identifier) continue; - updated_actions_dag_outputs.push_back(&rename_actions_dag->addAlias(*output_node, *column_identifier)); + updated_actions_dag_outputs.push_back(&rename_actions_dag.addAlias(*output_node, *column_identifier)); } - rename_actions_dag->getOutputs() = std::move(updated_actions_dag_outputs); + rename_actions_dag.getOutputs() = std::move(updated_actions_dag_outputs); auto rename_step = std::make_unique(query_plan.getCurrentDataStream(), std::move(rename_actions_dag)); rename_step->setStepDescription("Change column names to column identifiers"); @@ -1117,9 +1116,9 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres void joinCastPlanColumnsToNullable(QueryPlan & plan_to_add_cast, PlannerContextPtr & planner_context, const FunctionOverloadResolverPtr & to_nullable_function) { - auto cast_actions_dag = std::make_unique(plan_to_add_cast.getCurrentDataStream().header.getColumnsWithTypeAndName()); + ActionsDAG cast_actions_dag(plan_to_add_cast.getCurrentDataStream().header.getColumnsWithTypeAndName()); - for (auto & output_node : cast_actions_dag->getOutputs()) + for (auto & output_node : cast_actions_dag.getOutputs()) { if (planner_context->getGlobalPlannerContext()->hasColumnIdentifier(output_node->result_name)) { @@ -1128,11 +1127,11 @@ void joinCastPlanColumnsToNullable(QueryPlan & plan_to_add_cast, PlannerContextP type_to_check = type_to_check_low_cardinality->getDictionaryType(); if (type_to_check->canBeInsideNullable()) - output_node = &cast_actions_dag->addFunction(to_nullable_function, {output_node}, output_node->result_name); + output_node = &cast_actions_dag.addFunction(to_nullable_function, {output_node}, output_node->result_name); } } - cast_actions_dag->appendInputsForUnusedColumns(plan_to_add_cast.getCurrentDataStream().header); + cast_actions_dag.appendInputsForUnusedColumns(plan_to_add_cast.getCurrentDataStream().header); auto cast_join_columns_step = std::make_unique(plan_to_add_cast.getCurrentDataStream(), std::move(cast_actions_dag)); cast_join_columns_step->setStepDescription("Cast JOIN columns to Nullable"); plan_to_add_cast.addStep(std::move(cast_join_columns_step)); @@ -1178,16 +1177,16 @@ JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_ join_table_expression, planner_context); - join_clauses_and_actions.left_join_expressions_actions->appendInputsForUnusedColumns(left_plan.getCurrentDataStream().header); - auto left_join_expressions_actions_step = std::make_unique(left_plan.getCurrentDataStream(), join_clauses_and_actions.left_join_expressions_actions); + join_clauses_and_actions.left_join_expressions_actions.appendInputsForUnusedColumns(left_plan.getCurrentDataStream().header); + auto left_join_expressions_actions_step = std::make_unique(left_plan.getCurrentDataStream(), std::move(join_clauses_and_actions.left_join_expressions_actions)); left_join_expressions_actions_step->setStepDescription("JOIN actions"); - appendSetsFromActionsDAG(*left_join_expressions_actions_step->getExpression(), left_join_tree_query_plan.useful_sets); + appendSetsFromActionsDAG(left_join_expressions_actions_step->getExpression(), left_join_tree_query_plan.useful_sets); left_plan.addStep(std::move(left_join_expressions_actions_step)); - join_clauses_and_actions.right_join_expressions_actions->appendInputsForUnusedColumns(right_plan.getCurrentDataStream().header); - auto right_join_expressions_actions_step = std::make_unique(right_plan.getCurrentDataStream(), join_clauses_and_actions.right_join_expressions_actions); + join_clauses_and_actions.right_join_expressions_actions.appendInputsForUnusedColumns(right_plan.getCurrentDataStream().header); + auto right_join_expressions_actions_step = std::make_unique(right_plan.getCurrentDataStream(), std::move(join_clauses_and_actions.right_join_expressions_actions)); right_join_expressions_actions_step->setStepDescription("JOIN actions"); - appendSetsFromActionsDAG(*right_join_expressions_actions_step->getExpression(), right_join_tree_query_plan.useful_sets); + appendSetsFromActionsDAG(right_join_expressions_actions_step->getExpression(), right_join_tree_query_plan.useful_sets); right_plan.addStep(std::move(right_join_expressions_actions_step)); } @@ -1225,19 +1224,19 @@ JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_ auto join_cast_plan_output_nodes = [&](QueryPlan & plan_to_add_cast, std::unordered_map & plan_column_name_to_cast_type) { - auto cast_actions_dag = std::make_unique(plan_to_add_cast.getCurrentDataStream().header.getColumnsWithTypeAndName()); + ActionsDAG cast_actions_dag(plan_to_add_cast.getCurrentDataStream().header.getColumnsWithTypeAndName()); - for (auto & output_node : cast_actions_dag->getOutputs()) + for (auto & output_node : cast_actions_dag.getOutputs()) { auto it = plan_column_name_to_cast_type.find(output_node->result_name); if (it == plan_column_name_to_cast_type.end()) continue; const auto & cast_type = it->second; - output_node = &cast_actions_dag->addCast(*output_node, cast_type, output_node->result_name); + output_node = &cast_actions_dag.addCast(*output_node, cast_type, output_node->result_name); } - cast_actions_dag->appendInputsForUnusedColumns(plan_to_add_cast.getCurrentDataStream().header); + cast_actions_dag.appendInputsForUnusedColumns(plan_to_add_cast.getCurrentDataStream().header); auto cast_join_columns_step = std::make_unique(plan_to_add_cast.getCurrentDataStream(), std::move(cast_actions_dag)); cast_join_columns_step->setStepDescription("Cast JOIN USING columns"); @@ -1385,7 +1384,7 @@ JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_ { ExpressionActionsPtr & mixed_join_expression = table_join->getMixedJoinExpression(); mixed_join_expression = std::make_shared( - std::move(join_clauses_and_actions.mixed_join_expressions_actions), + std::move(*join_clauses_and_actions.mixed_join_expressions_actions), ExpressionActionsSettings::fromContext(planner_context->getQueryContext())); appendSetsFromActionsDAG(mixed_join_expression->getActionsDAG(), left_join_tree_query_plan.useful_sets); @@ -1542,12 +1541,12 @@ JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_ result_plan.unitePlans(std::move(join_step), {std::move(plans)}); } - auto drop_unused_columns_after_join_actions_dag = std::make_unique(result_plan.getCurrentDataStream().header.getColumnsWithTypeAndName()); + ActionsDAG drop_unused_columns_after_join_actions_dag(result_plan.getCurrentDataStream().header.getColumnsWithTypeAndName()); ActionsDAG::NodeRawConstPtrs drop_unused_columns_after_join_actions_dag_updated_outputs; std::unordered_set drop_unused_columns_after_join_actions_dag_updated_outputs_names; std::optional first_skipped_column_node_index; - auto & drop_unused_columns_after_join_actions_dag_outputs = drop_unused_columns_after_join_actions_dag->getOutputs(); + auto & drop_unused_columns_after_join_actions_dag_outputs = drop_unused_columns_after_join_actions_dag.getOutputs(); size_t drop_unused_columns_after_join_actions_dag_outputs_size = drop_unused_columns_after_join_actions_dag_outputs.size(); for (size_t i = 0; i < drop_unused_columns_after_join_actions_dag_outputs_size; ++i) @@ -1619,7 +1618,7 @@ JoinTreeQueryPlan buildQueryPlanForArrayJoinNode(const QueryTreeNodePtr & array_ auto plan = std::move(join_tree_query_plan.query_plan); auto plan_output_columns = plan.getCurrentDataStream().header.getColumnsWithTypeAndName(); - ActionsDAGPtr array_join_action_dag = std::make_unique(plan_output_columns); + ActionsDAG array_join_action_dag(plan_output_columns); PlannerActionsVisitor actions_visitor(planner_context); std::unordered_set array_join_expressions_output_nodes; @@ -1630,28 +1629,28 @@ JoinTreeQueryPlan buildQueryPlanForArrayJoinNode(const QueryTreeNodePtr & array_ array_join_column_names.insert(array_join_column_identifier); auto & array_join_expression_column = array_join_expression->as(); - auto expression_dag_index_nodes = actions_visitor.visit(*array_join_action_dag, array_join_expression_column.getExpressionOrThrow()); + auto expression_dag_index_nodes = actions_visitor.visit(array_join_action_dag, array_join_expression_column.getExpressionOrThrow()); for (auto & expression_dag_index_node : expression_dag_index_nodes) { - const auto * array_join_column_node = &array_join_action_dag->addAlias(*expression_dag_index_node, array_join_column_identifier); - array_join_action_dag->getOutputs().push_back(array_join_column_node); + const auto * array_join_column_node = &array_join_action_dag.addAlias(*expression_dag_index_node, array_join_column_identifier); + array_join_action_dag.getOutputs().push_back(array_join_column_node); array_join_expressions_output_nodes.insert(array_join_column_node->result_name); } } - array_join_action_dag->appendInputsForUnusedColumns(plan.getCurrentDataStream().header); + array_join_action_dag.appendInputsForUnusedColumns(plan.getCurrentDataStream().header); auto array_join_actions = std::make_unique(plan.getCurrentDataStream(), std::move(array_join_action_dag)); array_join_actions->setStepDescription("ARRAY JOIN actions"); - appendSetsFromActionsDAG(*array_join_actions->getExpression(), join_tree_query_plan.useful_sets); + appendSetsFromActionsDAG(array_join_actions->getExpression(), join_tree_query_plan.useful_sets); plan.addStep(std::move(array_join_actions)); - auto drop_unused_columns_before_array_join_actions_dag = std::make_unique(plan.getCurrentDataStream().header.getColumnsWithTypeAndName()); + ActionsDAG drop_unused_columns_before_array_join_actions_dag(plan.getCurrentDataStream().header.getColumnsWithTypeAndName()); ActionsDAG::NodeRawConstPtrs drop_unused_columns_before_array_join_actions_dag_updated_outputs; std::unordered_set drop_unused_columns_before_array_join_actions_dag_updated_outputs_names; - auto & drop_unused_columns_before_array_join_actions_dag_outputs = drop_unused_columns_before_array_join_actions_dag->getOutputs(); + auto & drop_unused_columns_before_array_join_actions_dag_outputs = drop_unused_columns_before_array_join_actions_dag.getOutputs(); size_t drop_unused_columns_before_array_join_actions_dag_outputs_size = drop_unused_columns_before_array_join_actions_dag_outputs.size(); for (size_t i = 0; i < drop_unused_columns_before_array_join_actions_dag_outputs_size; ++i) diff --git a/src/Planner/PlannerJoins.cpp b/src/Planner/PlannerJoins.cpp index 23b6a805ab9..db9678d91a6 100644 --- a/src/Planner/PlannerJoins.cpp +++ b/src/Planner/PlannerJoins.cpp @@ -177,13 +177,13 @@ std::set extractJoinTableSidesFromExpression(//const ActionsDAG:: } const ActionsDAG::Node * appendExpression( - ActionsDAGPtr & dag, + ActionsDAG & dag, const QueryTreeNodePtr & expression, const PlannerContextPtr & planner_context, const JoinNode & join_node) { PlannerActionsVisitor join_expression_visitor(planner_context); - auto join_expression_dag_node_raw_pointers = join_expression_visitor.visit(*dag, expression); + auto join_expression_dag_node_raw_pointers = join_expression_visitor.visit(dag, expression); if (join_expression_dag_node_raw_pointers.size() != 1) throw Exception(ErrorCodes::LOGICAL_ERROR, "JOIN {} ON clause contains multiple expressions", @@ -193,9 +193,9 @@ const ActionsDAG::Node * appendExpression( } void buildJoinClause( - ActionsDAGPtr & left_dag, - ActionsDAGPtr & right_dag, - ActionsDAGPtr & mixed_dag, + ActionsDAG & left_dag, + ActionsDAG & right_dag, + ActionsDAG & mixed_dag, const PlannerContextPtr & planner_context, const QueryTreeNodePtr & join_expression, const TableExpressionSet & left_table_expressions, @@ -376,8 +376,8 @@ JoinClausesAndActions buildJoinClausesAndActions( const JoinNode & join_node, const PlannerContextPtr & planner_context) { - ActionsDAGPtr left_join_actions = std::make_unique(left_table_expression_columns); - ActionsDAGPtr right_join_actions = std::make_unique(right_table_expression_columns); + ActionsDAG left_join_actions(left_table_expression_columns); + ActionsDAG right_join_actions(right_table_expression_columns); ColumnsWithTypeAndName mixed_table_expression_columns; for (const auto & left_column : left_table_expression_columns) { @@ -387,7 +387,7 @@ JoinClausesAndActions buildJoinClausesAndActions( { mixed_table_expression_columns.push_back(right_column); } - ActionsDAGPtr mixed_join_actions = std::make_unique(mixed_table_expression_columns); + ActionsDAG mixed_join_actions(mixed_table_expression_columns); /** It is possible to have constant value in JOIN ON section, that we need to ignore during DAG construction. * If we do not ignore it, this function will be replaced by underlying constant. @@ -498,12 +498,12 @@ JoinClausesAndActions buildJoinClausesAndActions( { const ActionsDAG::Node * dag_filter_condition_node = nullptr; if (left_filter_condition_nodes.size() > 1) - dag_filter_condition_node = &left_join_actions->addFunction(and_function, left_filter_condition_nodes, {}); + dag_filter_condition_node = &left_join_actions.addFunction(and_function, left_filter_condition_nodes, {}); else dag_filter_condition_node = left_filter_condition_nodes[0]; join_clause.getLeftFilterConditionNodes() = {dag_filter_condition_node}; - left_join_actions->addOrReplaceInOutputs(*dag_filter_condition_node); + left_join_actions.addOrReplaceInOutputs(*dag_filter_condition_node); add_necessary_name_if_needed(JoinTableSide::Left, dag_filter_condition_node->result_name); } @@ -514,12 +514,12 @@ JoinClausesAndActions buildJoinClausesAndActions( const ActionsDAG::Node * dag_filter_condition_node = nullptr; if (right_filter_condition_nodes.size() > 1) - dag_filter_condition_node = &right_join_actions->addFunction(and_function, right_filter_condition_nodes, {}); + dag_filter_condition_node = &right_join_actions.addFunction(and_function, right_filter_condition_nodes, {}); else dag_filter_condition_node = right_filter_condition_nodes[0]; join_clause.getRightFilterConditionNodes() = {dag_filter_condition_node}; - right_join_actions->addOrReplaceInOutputs(*dag_filter_condition_node); + right_join_actions.addOrReplaceInOutputs(*dag_filter_condition_node); add_necessary_name_if_needed(JoinTableSide::Right, dag_filter_condition_node->result_name); } @@ -556,10 +556,10 @@ JoinClausesAndActions buildJoinClausesAndActions( } if (!left_key_node->result_type->equals(*common_type)) - left_key_node = &left_join_actions->addCast(*left_key_node, common_type, {}); + left_key_node = &left_join_actions.addCast(*left_key_node, common_type, {}); if (!right_key_node->result_type->equals(*common_type)) - right_key_node = &right_join_actions->addCast(*right_key_node, common_type, {}); + right_key_node = &right_join_actions.addCast(*right_key_node, common_type, {}); } if (join_clause.isNullsafeCompareKey(i) && left_key_node->result_type->isNullable() && right_key_node->result_type->isNullable()) @@ -576,24 +576,24 @@ JoinClausesAndActions buildJoinClausesAndActions( * SELECT * FROM t1 JOIN t2 ON tuple(t1.a) == tuple(t2.b) */ auto wrap_nullsafe_function = FunctionFactory::instance().get("tuple", planner_context->getQueryContext()); - left_key_node = &left_join_actions->addFunction(wrap_nullsafe_function, {left_key_node}, {}); - right_key_node = &right_join_actions->addFunction(wrap_nullsafe_function, {right_key_node}, {}); + left_key_node = &left_join_actions.addFunction(wrap_nullsafe_function, {left_key_node}, {}); + right_key_node = &right_join_actions.addFunction(wrap_nullsafe_function, {right_key_node}, {}); } - left_join_actions->addOrReplaceInOutputs(*left_key_node); - right_join_actions->addOrReplaceInOutputs(*right_key_node); + left_join_actions.addOrReplaceInOutputs(*left_key_node); + right_join_actions.addOrReplaceInOutputs(*right_key_node); add_necessary_name_if_needed(JoinTableSide::Left, left_key_node->result_name); add_necessary_name_if_needed(JoinTableSide::Right, right_key_node->result_name); } } - result.left_join_expressions_actions = ActionsDAG::clone(left_join_actions); - result.left_join_tmp_expression_actions = std::move(left_join_actions); - result.left_join_expressions_actions->removeUnusedActions(join_left_actions_names); - result.right_join_expressions_actions = ActionsDAG::clone(right_join_actions); - result.right_join_tmp_expression_actions = std::move(right_join_actions); - result.right_join_expressions_actions->removeUnusedActions(join_right_actions_names); + result.left_join_expressions_actions = std::move(left_join_actions); + //result.left_join_tmp_expression_actions = std::move(left_join_actions); + result.left_join_expressions_actions.removeUnusedActions(join_left_actions_names); + result.right_join_expressions_actions = std::move(right_join_actions); + //result.right_join_tmp_expression_actions = std::move(right_join_actions); + result.right_join_expressions_actions.removeUnusedActions(join_right_actions_names); if (is_inequal_join) { @@ -601,16 +601,16 @@ JoinClausesAndActions buildJoinClausesAndActions( /// So, for each column, we recalculate the value of the whole expression from JOIN ON to check if rows should be joined. if (result.join_clauses.size() > 1) { - auto mixed_join_expressions_actions = std::make_unique(mixed_table_expression_columns); + ActionsDAG mixed_join_expressions_actions(mixed_table_expression_columns); PlannerActionsVisitor join_expression_visitor(planner_context); - auto join_expression_dag_node_raw_pointers = join_expression_visitor.visit(*mixed_join_expressions_actions, join_expression); + auto join_expression_dag_node_raw_pointers = join_expression_visitor.visit(mixed_join_expressions_actions, join_expression); if (join_expression_dag_node_raw_pointers.size() != 1) throw Exception( ErrorCodes::LOGICAL_ERROR, "JOIN {} ON clause contains multiple expressions", join_node.formatASTForErrorMessage()); - mixed_join_expressions_actions->addOrReplaceInOutputs(*join_expression_dag_node_raw_pointers[0]); + mixed_join_expressions_actions.addOrReplaceInOutputs(*join_expression_dag_node_raw_pointers[0]); Names required_names{join_expression_dag_node_raw_pointers[0]->result_name}; - mixed_join_expressions_actions->removeUnusedActions(required_names); + mixed_join_expressions_actions.removeUnusedActions(required_names); result.mixed_join_expressions_actions = std::move(mixed_join_expressions_actions); } else diff --git a/src/Planner/PlannerJoins.h b/src/Planner/PlannerJoins.h index 8adf6edd7ea..3735c373acc 100644 --- a/src/Planner/PlannerJoins.h +++ b/src/Planner/PlannerJoins.h @@ -182,15 +182,15 @@ struct JoinClausesAndActions /// Join clauses. Actions dag nodes point into join_expression_actions. JoinClauses join_clauses; /// Whole JOIN ON section expressions - ActionsDAGPtr left_join_tmp_expression_actions; - ActionsDAGPtr right_join_tmp_expression_actions; + // ActionsDAGPtr left_join_tmp_expression_actions; + // ActionsDAGPtr right_join_tmp_expression_actions; /// Left join expressions actions - ActionsDAGPtr left_join_expressions_actions; + ActionsDAG left_join_expressions_actions; /// Right join expressions actions - ActionsDAGPtr right_join_expressions_actions; + ActionsDAG right_join_expressions_actions; /// Originally used for inequal join. it's the total join expression. /// If there is no inequal join conditions, it's null. - ActionsDAGPtr mixed_join_expressions_actions; + std::optional mixed_join_expressions_actions; }; /** Calculate join clauses and actions for JOIN ON section. diff --git a/src/Planner/Utils.cpp b/src/Planner/Utils.cpp index 7ac53e0f8c1..e9f9c51d338 100644 --- a/src/Planner/Utils.cpp +++ b/src/Planner/Utils.cpp @@ -442,22 +442,22 @@ FilterDAGInfo buildFilterInfo(QueryTreeNodePtr filter_query_tree, collectSourceColumns(filter_query_tree, planner_context, false /*keep_alias_columns*/); collectSets(filter_query_tree, *planner_context); - auto filter_actions_dag = std::make_unique(); + ActionsDAG filter_actions_dag; PlannerActionsVisitor actions_visitor(planner_context, false /*use_column_identifier_as_action_node_name*/); - auto expression_nodes = actions_visitor.visit(*filter_actions_dag, filter_query_tree); + auto expression_nodes = actions_visitor.visit(filter_actions_dag, filter_query_tree); if (expression_nodes.size() != 1) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Filter actions must return single output node. Actual {}", expression_nodes.size()); - auto & filter_actions_outputs = filter_actions_dag->getOutputs(); + auto & filter_actions_outputs = filter_actions_dag.getOutputs(); filter_actions_outputs = std::move(expression_nodes); std::string filter_node_name = filter_actions_outputs[0]->result_name; bool remove_filter_column = true; - for (const auto & filter_input_node : filter_actions_dag->getInputs()) + for (const auto & filter_input_node : filter_actions_dag.getInputs()) if (table_expression_required_names_without_filter.contains(filter_input_node->result_name)) filter_actions_outputs.push_back(filter_input_node); @@ -498,7 +498,7 @@ void appendSetsFromActionsDAG(const ActionsDAG & dag, UsefulSets & useful_sets) { if (const auto * index_hint = typeid_cast(adaptor->getFunction().get())) { - appendSetsFromActionsDAG(*index_hint->getActions(), useful_sets); + appendSetsFromActionsDAG(index_hint->getActions(), useful_sets); } } } diff --git a/src/Processors/QueryPlan/AggregatingStep.cpp b/src/Processors/QueryPlan/AggregatingStep.cpp index 64ba7f7cd2a..f31de80b22d 100644 --- a/src/Processors/QueryPlan/AggregatingStep.cpp +++ b/src/Processors/QueryPlan/AggregatingStep.cpp @@ -303,15 +303,15 @@ void AggregatingStep::transformPipeline(QueryPipelineBuilder & pipeline, const B const auto & header = ports[set_counter]->getHeader(); /// Here we create a DAG which fills missing keys and adds `__grouping_set` column - auto dag = std::make_unique(header.getColumnsWithTypeAndName()); + ActionsDAG dag(header.getColumnsWithTypeAndName()); ActionsDAG::NodeRawConstPtrs outputs; outputs.reserve(output_header.columns() + 1); auto grouping_col = ColumnConst::create(ColumnUInt64::create(1, set_counter), 0); - const auto * grouping_node = &dag->addColumn( + const auto * grouping_node = &dag.addColumn( {ColumnPtr(std::move(grouping_col)), std::make_shared(), "__grouping_set"}); - grouping_node = &dag->materializeNode(*grouping_node); + grouping_node = &dag.materializeNode(*grouping_node); outputs.push_back(grouping_node); const auto & missing_columns = grouping_sets_params[set_counter].missing_keys; @@ -332,21 +332,21 @@ void AggregatingStep::transformPipeline(QueryPipelineBuilder & pipeline, const B column_with_default->finalize(); auto column = ColumnConst::create(std::move(column_with_default), 0); - const auto * node = &dag->addColumn({ColumnPtr(std::move(column)), col.type, col.name}); - node = &dag->materializeNode(*node); + const auto * node = &dag.addColumn({ColumnPtr(std::move(column)), col.type, col.name}); + node = &dag.materializeNode(*node); outputs.push_back(node); } else { - const auto * column_node = dag->getOutputs()[header.getPositionByName(col.name)]; + const auto * column_node = dag.getOutputs()[header.getPositionByName(col.name)]; if (used_it != used_keys.end() && group_by_use_nulls && column_node->result_type->canBeInsideNullable()) - outputs.push_back(&dag->addFunction(to_nullable_function, { column_node }, col.name)); + outputs.push_back(&dag.addFunction(to_nullable_function, { column_node }, col.name)); else outputs.push_back(column_node); } } - dag->getOutputs().swap(outputs); + dag.getOutputs().swap(outputs); auto expression = std::make_shared(std::move(dag), settings.getActionsSettings()); auto transform = std::make_shared(header, expression); diff --git a/src/Processors/QueryPlan/CubeStep.cpp b/src/Processors/QueryPlan/CubeStep.cpp index b6c70061987..3a98f8e4612 100644 --- a/src/Processors/QueryPlan/CubeStep.cpp +++ b/src/Processors/QueryPlan/CubeStep.cpp @@ -36,27 +36,27 @@ CubeStep::CubeStep(const DataStream & input_stream_, Aggregator::Params params_, ProcessorPtr addGroupingSetForTotals(const Block & header, const Names & keys, bool use_nulls, const BuildQueryPipelineSettings & settings, UInt64 grouping_set_number) { - auto dag = std::make_unique(header.getColumnsWithTypeAndName()); - auto & outputs = dag->getOutputs(); + ActionsDAG dag(header.getColumnsWithTypeAndName()); + auto & outputs = dag.getOutputs(); if (use_nulls) { auto to_nullable = FunctionFactory::instance().get("toNullable", nullptr); for (const auto & key : keys) { - const auto * node = dag->getOutputs()[header.getPositionByName(key)]; + const auto * node = dag.getOutputs()[header.getPositionByName(key)]; if (node->result_type->canBeInsideNullable()) { - dag->addOrReplaceInOutputs(dag->addFunction(to_nullable, { node }, node->result_name)); + dag.addOrReplaceInOutputs(dag.addFunction(to_nullable, { node }, node->result_name)); } } } auto grouping_col = ColumnUInt64::create(1, grouping_set_number); - const auto * grouping_node = &dag->addColumn( + const auto * grouping_node = &dag.addColumn( {ColumnPtr(std::move(grouping_col)), std::make_shared(), "__grouping_set"}); - grouping_node = &dag->materializeNode(*grouping_node); + grouping_node = &dag.materializeNode(*grouping_node); outputs.insert(outputs.begin(), grouping_node); auto expression = std::make_shared(std::move(dag), settings.getActionsSettings()); diff --git a/src/Processors/QueryPlan/DistributedCreateLocalPlan.cpp b/src/Processors/QueryPlan/DistributedCreateLocalPlan.cpp index 1f4f271fa6e..1c199ebedb3 100644 --- a/src/Processors/QueryPlan/DistributedCreateLocalPlan.cpp +++ b/src/Processors/QueryPlan/DistributedCreateLocalPlan.cpp @@ -32,7 +32,7 @@ void addConvertingActions(QueryPlan & plan, const Block & header, bool has_missi }; auto convert_actions_dag = get_converting_dag(plan.getCurrentDataStream().header, header); - auto converting = std::make_unique(plan.getCurrentDataStream(), convert_actions_dag); + auto converting = std::make_unique(plan.getCurrentDataStream(), std::move(convert_actions_dag)); plan.addStep(std::move(converting)); } diff --git a/src/Processors/QueryPlan/ExpressionStep.cpp b/src/Processors/QueryPlan/ExpressionStep.cpp index 50bc2e1533e..94098f443d9 100644 --- a/src/Processors/QueryPlan/ExpressionStep.cpp +++ b/src/Processors/QueryPlan/ExpressionStep.cpp @@ -10,33 +10,33 @@ namespace DB { -static ITransformingStep::Traits getTraits(const ActionsDAGPtr & actions, const Block & header, const SortDescription & sort_description) +static ITransformingStep::Traits getTraits(const ActionsDAG & actions, const Block & header, const SortDescription & sort_description) { return ITransformingStep::Traits { { .returns_single_stream = false, .preserves_number_of_streams = true, - .preserves_sorting = actions->isSortingPreserved(header, sort_description), + .preserves_sorting = actions.isSortingPreserved(header, sort_description), }, { - .preserves_number_of_rows = !actions->hasArrayJoin(), + .preserves_number_of_rows = !actions.hasArrayJoin(), } }; } -ExpressionStep::ExpressionStep(const DataStream & input_stream_, const ActionsDAGPtr & actions_dag_) +ExpressionStep::ExpressionStep(const DataStream & input_stream_, ActionsDAG actions_dag_) : ITransformingStep( input_stream_, - ExpressionTransform::transformHeader(input_stream_.header, *actions_dag_), + ExpressionTransform::transformHeader(input_stream_.header, actions_dag_), getTraits(actions_dag_, input_stream_.header, input_stream_.sort_description)) - , actions_dag(ActionsDAG::clone(actions_dag_)) + , actions_dag(std::move(actions_dag_)) { } void ExpressionStep::transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings & settings) { - auto expression = std::make_shared(ActionsDAG::clone(actions_dag), settings.getActionsSettings()); + auto expression = std::make_shared(std::move(actions_dag), settings.getActionsSettings()); pipeline.addSimpleTransform([&](const Block & header) { @@ -61,25 +61,25 @@ void ExpressionStep::transformPipeline(QueryPipelineBuilder & pipeline, const Bu void ExpressionStep::describeActions(FormatSettings & settings) const { String prefix(settings.offset, settings.indent_char); - auto expression = std::make_shared(ActionsDAG::clone(actions_dag)); + auto expression = std::make_shared(std::move(*ActionsDAG::clone(&actions_dag))); expression->describeActions(settings.out, prefix); } void ExpressionStep::describeActions(JSONBuilder::JSONMap & map) const { - auto expression = std::make_shared(ActionsDAG::clone(actions_dag)); + auto expression = std::make_shared(std::move(*ActionsDAG::clone(&actions_dag))); map.add("Expression", expression->toTree()); } void ExpressionStep::updateOutputStream() { output_stream = createOutputStream( - input_streams.front(), ExpressionTransform::transformHeader(input_streams.front().header, *actions_dag), getDataStreamTraits()); + input_streams.front(), ExpressionTransform::transformHeader(input_streams.front().header, actions_dag), getDataStreamTraits()); if (!getDataStreamTraits().preserves_sorting) return; - FindAliasForInputName alias_finder(*actions_dag); + FindAliasForInputName alias_finder(actions_dag); const auto & input_sort_description = getInputStreams().front().sort_description; for (size_t i = 0, s = input_sort_description.size(); i < s; ++i) { diff --git a/src/Processors/QueryPlan/ExpressionStep.h b/src/Processors/QueryPlan/ExpressionStep.h index ebbac8217cb..f2926318cbc 100644 --- a/src/Processors/QueryPlan/ExpressionStep.h +++ b/src/Processors/QueryPlan/ExpressionStep.h @@ -1,12 +1,10 @@ #pragma once #include +#include namespace DB { -class ActionsDAG; -using ActionsDAGPtr = std::unique_ptr; - class ExpressionTransform; class JoiningTransform; @@ -15,21 +13,22 @@ class ExpressionStep : public ITransformingStep { public: - explicit ExpressionStep(const DataStream & input_stream_, const ActionsDAGPtr & actions_dag_); + explicit ExpressionStep(const DataStream & input_stream_, ActionsDAG actions_dag_); String getName() const override { return "Expression"; } void transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings & settings) override; void describeActions(FormatSettings & settings) const override; - const ActionsDAGPtr & getExpression() const { return actions_dag; } + ActionsDAG & getExpression() { return actions_dag; } + const ActionsDAG & getExpression() const { return actions_dag; } void describeActions(JSONBuilder::JSONMap & map) const override; private: void updateOutputStream() override; - ActionsDAGPtr actions_dag; + ActionsDAG actions_dag; }; } diff --git a/src/Processors/QueryPlan/FilterStep.cpp b/src/Processors/QueryPlan/FilterStep.cpp index 7883461f45a..5f15c5defac 100644 --- a/src/Processors/QueryPlan/FilterStep.cpp +++ b/src/Processors/QueryPlan/FilterStep.cpp @@ -9,9 +9,9 @@ namespace DB { -static ITransformingStep::Traits getTraits(const ActionsDAGPtr & expression, const Block & header, const SortDescription & sort_description, bool remove_filter_column, const String & filter_column_name) +static ITransformingStep::Traits getTraits(const ActionsDAG & expression, const Block & header, const SortDescription & sort_description, bool remove_filter_column, const String & filter_column_name) { - bool preserves_sorting = expression->isSortingPreserved(header, sort_description, remove_filter_column ? filter_column_name : ""); + bool preserves_sorting = expression.isSortingPreserved(header, sort_description, remove_filter_column ? filter_column_name : ""); if (remove_filter_column) { preserves_sorting &= std::find_if( @@ -35,22 +35,22 @@ static ITransformingStep::Traits getTraits(const ActionsDAGPtr & expression, con FilterStep::FilterStep( const DataStream & input_stream_, - const ActionsDAGPtr & actions_dag_, + ActionsDAG actions_dag_, String filter_column_name_, bool remove_filter_column_) : ITransformingStep( input_stream_, FilterTransform::transformHeader( input_stream_.header, - actions_dag_.get(), + &actions_dag_, filter_column_name_, remove_filter_column_), getTraits(actions_dag_, input_stream_.header, input_stream_.sort_description, remove_filter_column_, filter_column_name_)) + , actions_dag(std::move(actions_dag_)) , filter_column_name(std::move(filter_column_name_)) , remove_filter_column(remove_filter_column_) { - actions_dag = ActionsDAG::clone(actions_dag_); - actions_dag->removeAliasesForFilter(filter_column_name); + actions_dag.removeAliasesForFilter(filter_column_name); } void FilterStep::transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings & settings) @@ -87,7 +87,7 @@ void FilterStep::describeActions(FormatSettings & settings) const settings.out << " (removed)"; settings.out << '\n'; - auto expression = std::make_shared(ActionsDAG::clone(actions_dag)); + auto expression = std::make_shared(std::move(*ActionsDAG::clone(&actions_dag))); expression->describeActions(settings.out, prefix); } @@ -96,7 +96,7 @@ void FilterStep::describeActions(JSONBuilder::JSONMap & map) const map.add("Filter Column", filter_column_name); map.add("Removes Filter", remove_filter_column); - auto expression = std::make_shared(ActionsDAG::clone(actions_dag)); + auto expression = std::make_shared(std::move(*ActionsDAG::clone(&actions_dag))); map.add("Expression", expression->toTree()); } @@ -104,13 +104,13 @@ void FilterStep::updateOutputStream() { output_stream = createOutputStream( input_streams.front(), - FilterTransform::transformHeader(input_streams.front().header, actions_dag.get(), filter_column_name, remove_filter_column), + FilterTransform::transformHeader(input_streams.front().header, &actions_dag, filter_column_name, remove_filter_column), getDataStreamTraits()); if (!getDataStreamTraits().preserves_sorting) return; - FindAliasForInputName alias_finder(*actions_dag); + FindAliasForInputName alias_finder(actions_dag); const auto & input_sort_description = getInputStreams().front().sort_description; for (size_t i = 0, s = input_sort_description.size(); i < s; ++i) { diff --git a/src/Processors/QueryPlan/FilterStep.h b/src/Processors/QueryPlan/FilterStep.h index 0f894a570b7..b5a31bef5fc 100644 --- a/src/Processors/QueryPlan/FilterStep.h +++ b/src/Processors/QueryPlan/FilterStep.h @@ -1,19 +1,17 @@ #pragma once #include +#include namespace DB { -class ActionsDAG; -using ActionsDAGPtr = std::unique_ptr; - /// Implements WHERE, HAVING operations. See FilterTransform. class FilterStep : public ITransformingStep { public: FilterStep( const DataStream & input_stream_, - const ActionsDAGPtr & actions_dag_, + ActionsDAG actions_dag_, String filter_column_name_, bool remove_filter_column_); @@ -23,15 +21,15 @@ public: void describeActions(JSONBuilder::JSONMap & map) const override; void describeActions(FormatSettings & settings) const override; - const ActionsDAGPtr & getExpression() const { return actions_dag; } - ActionsDAGPtr & getExpression() { return actions_dag; } + const ActionsDAG & getExpression() const { return actions_dag; } + ActionsDAG & getExpression() { return actions_dag; } const String & getFilterColumnName() const { return filter_column_name; } bool removesFilterColumn() const { return remove_filter_column; } private: void updateOutputStream() override; - ActionsDAGPtr actions_dag; + ActionsDAG actions_dag; String filter_column_name; bool remove_filter_column; }; diff --git a/src/Processors/QueryPlan/Optimizations/convertOuterJoinToInnerJoin.cpp b/src/Processors/QueryPlan/Optimizations/convertOuterJoinToInnerJoin.cpp index d90f0e152e7..be468419cfb 100644 --- a/src/Processors/QueryPlan/Optimizations/convertOuterJoinToInnerJoin.cpp +++ b/src/Processors/QueryPlan/Optimizations/convertOuterJoinToInnerJoin.cpp @@ -45,10 +45,10 @@ size_t tryConvertOuterJoinToInnerJoin(QueryPlan::Node * parent_node, QueryPlan:: bool right_stream_safe = true; if (check_left_stream) - left_stream_safe = filter_dag->isFilterAlwaysFalseForDefaultValueInputs(filter_column_name, left_stream_input_header); + left_stream_safe = filter_dag.isFilterAlwaysFalseForDefaultValueInputs(filter_column_name, left_stream_input_header); if (check_right_stream) - right_stream_safe = filter_dag->isFilterAlwaysFalseForDefaultValueInputs(filter_column_name, right_stream_input_header); + right_stream_safe = filter_dag.isFilterAlwaysFalseForDefaultValueInputs(filter_column_name, right_stream_input_header); if (!left_stream_safe || !right_stream_safe) return 0; diff --git a/src/Processors/QueryPlan/Optimizations/distinctReadInOrder.cpp b/src/Processors/QueryPlan/Optimizations/distinctReadInOrder.cpp index 6cdc3cb4eb0..8666912514e 100644 --- a/src/Processors/QueryPlan/Optimizations/distinctReadInOrder.cpp +++ b/src/Processors/QueryPlan/Optimizations/distinctReadInOrder.cpp @@ -79,9 +79,9 @@ size_t tryDistinctReadInOrder(QueryPlan::Node * parent_node) steps_to_update.push_back(step); if (const auto * const expr = typeid_cast(step); expr) - dag_stack.push_back(expr->getExpression().get()); + dag_stack.push_back(&expr->getExpression()); else if (const auto * const filter = typeid_cast(step); filter) - dag_stack.push_back(filter->getExpression().get()); + dag_stack.push_back(&filter->getExpression()); node = node->children.front(); } diff --git a/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp b/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp index f26cd79dd97..411b20b1a32 100644 --- a/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp +++ b/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp @@ -101,7 +101,7 @@ static NameSet findIdentifiersOfNode(const ActionsDAG::Node * node) return res; } -static ActionsDAGPtr splitFilter(QueryPlan::Node * parent_node, const Names & available_inputs, size_t child_idx = 0) +static std::optional splitFilter(QueryPlan::Node * parent_node, const Names & available_inputs, size_t child_idx = 0) { QueryPlan::Node * child_node = parent_node->children.front(); checkChildrenSize(child_node, child_idx + 1); @@ -110,16 +110,16 @@ static ActionsDAGPtr splitFilter(QueryPlan::Node * parent_node, const Names & av auto & child = child_node->step; auto * filter = assert_cast(parent.get()); - const auto & expression = filter->getExpression(); + auto & expression = filter->getExpression(); const auto & filter_column_name = filter->getFilterColumnName(); bool removes_filter = filter->removesFilterColumn(); const auto & all_inputs = child->getInputStreams()[child_idx].header.getColumnsWithTypeAndName(); - return expression->splitActionsForFilterPushDown(filter_column_name, removes_filter, available_inputs, all_inputs); + return expression.splitActionsForFilterPushDown(filter_column_name, removes_filter, available_inputs, all_inputs); } static size_t -addNewFilterStepOrThrow(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes, const ActionsDAGPtr & split_filter, +addNewFilterStepOrThrow(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes, ActionsDAG split_filter, bool can_remove_filter = true, size_t child_idx = 0, bool update_parent_filter = true) { QueryPlan::Node * child_node = parent_node->children.front(); @@ -129,14 +129,14 @@ addNewFilterStepOrThrow(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes, auto & child = child_node->step; auto * filter = assert_cast(parent.get()); - const auto & expression = filter->getExpression(); + auto & expression = filter->getExpression(); const auto & filter_column_name = filter->getFilterColumnName(); - const auto * filter_node = expression->tryFindInOutputs(filter_column_name); + const auto * filter_node = expression.tryFindInOutputs(filter_column_name); if (update_parent_filter && !filter_node && !filter->removesFilterColumn()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Filter column {} was removed from ActionsDAG but it is needed in result. DAG:\n{}", - filter_column_name, expression->dumpDAG()); + filter_column_name, expression.dumpDAG()); /// Add new Filter step before Child. /// Expression/Filter -> Child -> Something @@ -147,10 +147,10 @@ addNewFilterStepOrThrow(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes, /// Expression/Filter -> Child -> Filter -> Something /// New filter column is the first one. - String split_filter_column_name = split_filter->getOutputs().front()->result_name; + String split_filter_column_name = split_filter.getOutputs().front()->result_name; node.step = std::make_unique( - node.children.at(0)->step->getOutputStream(), split_filter, std::move(split_filter_column_name), can_remove_filter); + node.children.at(0)->step->getOutputStream(), std::move(split_filter), std::move(split_filter_column_name), can_remove_filter); if (auto * transforming_step = dynamic_cast(child.get())) { @@ -176,7 +176,7 @@ addNewFilterStepOrThrow(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes, { /// This means that all predicates of filter were pushed down. /// Replace current actions to expression, as we don't need to filter anything. - parent = std::make_unique(child->getOutputStream(), expression); + parent = std::make_unique(child->getOutputStream(), std::move(expression)); } else { @@ -192,7 +192,7 @@ tryAddNewFilterStep(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes, con bool can_remove_filter = true, size_t child_idx = 0) { if (auto split_filter = splitFilter(parent_node, allowed_inputs, child_idx)) - return addNewFilterStepOrThrow(parent_node, nodes, split_filter, can_remove_filter, child_idx); + return addNewFilterStepOrThrow(parent_node, nodes, std::move(*split_filter), can_remove_filter, child_idx); return 0; } @@ -332,7 +332,7 @@ static size_t tryPushDownOverJoinStep(QueryPlan::Node * parent_node, QueryPlan:: Names left_stream_available_columns_to_push_down = get_available_columns_for_filter(true /*push_to_left_stream*/, left_stream_filter_push_down_input_columns_available); Names right_stream_available_columns_to_push_down = get_available_columns_for_filter(false /*push_to_left_stream*/, right_stream_filter_push_down_input_columns_available); - auto join_filter_push_down_actions = filter->getExpression()->splitActionsForJOINFilterPushDown(filter->getFilterColumnName(), + auto join_filter_push_down_actions = filter->getExpression().splitActionsForJOINFilterPushDown(filter->getFilterColumnName(), filter->removesFilterColumn(), left_stream_available_columns_to_push_down, left_stream_input_header, @@ -346,42 +346,44 @@ static size_t tryPushDownOverJoinStep(QueryPlan::Node * parent_node, QueryPlan:: if (join_filter_push_down_actions.left_stream_filter_to_push_down) { + const auto & result_name = join_filter_push_down_actions.left_stream_filter_to_push_down->getOutputs()[0]->result_name; updated_steps += addNewFilterStepOrThrow(parent_node, nodes, - join_filter_push_down_actions.left_stream_filter_to_push_down, + std::move(*join_filter_push_down_actions.left_stream_filter_to_push_down), join_filter_push_down_actions.left_stream_filter_removes_filter, 0 /*child_idx*/, false /*update_parent_filter*/); LOG_DEBUG(&Poco::Logger::get("QueryPlanOptimizations"), "Pushed down filter {} to the {} side of join", - join_filter_push_down_actions.left_stream_filter_to_push_down->getOutputs()[0]->result_name, + result_name, JoinKind::Left); } if (join_filter_push_down_actions.right_stream_filter_to_push_down && allow_push_down_to_right) { + const auto & result_name = join_filter_push_down_actions.right_stream_filter_to_push_down->getOutputs()[0]->result_name; updated_steps += addNewFilterStepOrThrow(parent_node, nodes, - join_filter_push_down_actions.right_stream_filter_to_push_down, + std::move(*join_filter_push_down_actions.right_stream_filter_to_push_down), join_filter_push_down_actions.right_stream_filter_removes_filter, 1 /*child_idx*/, false /*update_parent_filter*/); LOG_DEBUG(&Poco::Logger::get("QueryPlanOptimizations"), "Pushed down filter {} to the {} side of join", - join_filter_push_down_actions.right_stream_filter_to_push_down->getOutputs()[0]->result_name, + result_name, JoinKind::Right); } if (updated_steps > 0) { const auto & filter_column_name = filter->getFilterColumnName(); - const auto & filter_expression = filter->getExpression(); + auto & filter_expression = filter->getExpression(); - const auto * filter_node = filter_expression->tryFindInOutputs(filter_column_name); + const auto * filter_node = filter_expression.tryFindInOutputs(filter_column_name); if (!filter_node && !filter->removesFilterColumn()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Filter column {} was removed from ActionsDAG but it is needed in result. DAG:\n{}", - filter_column_name, filter_expression->dumpDAG()); + filter_column_name, filter_expression.dumpDAG()); /// Filter column was replaced to constant. @@ -391,7 +393,7 @@ static size_t tryPushDownOverJoinStep(QueryPlan::Node * parent_node, QueryPlan:: { /// This means that all predicates of filter were pushed down. /// Replace current actions to expression, as we don't need to filter anything. - parent = std::make_unique(child->getOutputStream(), filter_expression); + parent = std::make_unique(child->getOutputStream(), std::move(filter_expression)); } else { @@ -416,7 +418,7 @@ size_t tryPushDownFilter(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes if (!filter) return 0; - if (filter->getExpression()->hasStatefulFunctions()) + if (filter->getExpression().hasStatefulFunctions()) return 0; if (auto * aggregating = typeid_cast(child.get())) @@ -430,7 +432,7 @@ size_t tryPushDownFilter(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes return 0; const auto & actions = filter->getExpression(); - const auto & filter_node = actions->findInOutputs(filter->getFilterColumnName()); + const auto & filter_node = actions.findInOutputs(filter->getFilterColumnName()); auto identifiers_in_predicate = findIdentifiersOfNode(&filter_node); @@ -597,7 +599,7 @@ size_t tryPushDownFilter(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes filter_node.step = std::make_unique( filter_node.children.front()->step->getOutputStream(), - ActionsDAG::clone(filter->getExpression()), + std::move(*ActionsDAG::clone(&filter->getExpression())), filter->getFilterColumnName(), filter->removesFilterColumn()); } @@ -611,7 +613,7 @@ size_t tryPushDownFilter(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes if (auto * read_from_merge = typeid_cast(child.get())) { - FilterDAGInfo info{ActionsDAG::clone(filter->getExpression()), filter->getFilterColumnName(), filter->removesFilterColumn()}; + FilterDAGInfo info{std::move(*ActionsDAG::clone(&filter->getExpression())), filter->getFilterColumnName(), filter->removesFilterColumn()}; read_from_merge->addFilter(std::move(info)); std::swap(*parent_node, *child_node); return 1; diff --git a/src/Processors/QueryPlan/Optimizations/liftUpArrayJoin.cpp b/src/Processors/QueryPlan/Optimizations/liftUpArrayJoin.cpp index 36aab41df49..0d4f2330119 100644 --- a/src/Processors/QueryPlan/Optimizations/liftUpArrayJoin.cpp +++ b/src/Processors/QueryPlan/Optimizations/liftUpArrayJoin.cpp @@ -28,10 +28,10 @@ size_t tryLiftUpArrayJoin(QueryPlan::Node * parent_node, QueryPlan::Nodes & node const auto & expression = expression_step ? expression_step->getExpression() : filter_step->getExpression(); - auto split_actions = expression->splitActionsBeforeArrayJoin(array_join->columns); + auto split_actions = expression.splitActionsBeforeArrayJoin(array_join->columns); /// No actions can be moved before ARRAY JOIN. - if (split_actions.first->trivial()) + if (split_actions.first.trivial()) return 0; auto description = parent->getStepDescription(); @@ -49,9 +49,9 @@ size_t tryLiftUpArrayJoin(QueryPlan::Node * parent_node, QueryPlan::Nodes & node array_join_step->updateInputStream(node.step->getOutputStream()); if (expression_step) - parent = std::make_unique(array_join_step->getOutputStream(), split_actions.second); + parent = std::make_unique(array_join_step->getOutputStream(), std::move(split_actions.second)); else - parent = std::make_unique(array_join_step->getOutputStream(), split_actions.second, + parent = std::make_unique(array_join_step->getOutputStream(), std::move(split_actions.second), filter_step->getFilterColumnName(), filter_step->removesFilterColumn()); parent->setStepDescription(description + " [split]"); diff --git a/src/Processors/QueryPlan/Optimizations/liftUpFunctions.cpp b/src/Processors/QueryPlan/Optimizations/liftUpFunctions.cpp index b280e2d3cc6..7794ddae8fa 100644 --- a/src/Processors/QueryPlan/Optimizations/liftUpFunctions.cpp +++ b/src/Processors/QueryPlan/Optimizations/liftUpFunctions.cpp @@ -66,13 +66,13 @@ size_t tryExecuteFunctionsAfterSorting(QueryPlan::Node * parent_node, QueryPlan: NameSet sort_columns; for (const auto & col : sorting_step->getSortDescription()) sort_columns.insert(col.column_name); - auto [needed_for_sorting, unneeded_for_sorting, _] = expression_step->getExpression()->splitActionsBySortingDescription(sort_columns); + auto [needed_for_sorting, unneeded_for_sorting, _] = expression_step->getExpression().splitActionsBySortingDescription(sort_columns); // No calculations can be postponed. - if (unneeded_for_sorting->trivial()) + if (unneeded_for_sorting.trivial()) return 0; - if (!areNodesConvertableToBlock(needed_for_sorting->getOutputs()) || !areNodesConvertableToBlock(unneeded_for_sorting->getInputs())) + if (!areNodesConvertableToBlock(needed_for_sorting.getOutputs()) || !areNodesConvertableToBlock(unneeded_for_sorting.getInputs())) return 0; // Sorting (parent_node) -> Expression (child_node) diff --git a/src/Processors/QueryPlan/Optimizations/liftUpUnion.cpp b/src/Processors/QueryPlan/Optimizations/liftUpUnion.cpp index 4629bc0af53..53f59198d0f 100644 --- a/src/Processors/QueryPlan/Optimizations/liftUpUnion.cpp +++ b/src/Processors/QueryPlan/Optimizations/liftUpUnion.cpp @@ -49,7 +49,7 @@ size_t tryLiftUpUnion(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes) expr_node.step = std::make_unique( expr_node.children.front()->step->getOutputStream(), - ActionsDAG::clone(expression->getExpression())); + std::move(*ActionsDAG::clone(&expression->getExpression()))); } /// - Expression - Something diff --git a/src/Processors/QueryPlan/Optimizations/mergeExpressions.cpp b/src/Processors/QueryPlan/Optimizations/mergeExpressions.cpp index 97de69b1134..d7ca96e4c64 100644 --- a/src/Processors/QueryPlan/Optimizations/mergeExpressions.cpp +++ b/src/Processors/QueryPlan/Optimizations/mergeExpressions.cpp @@ -38,18 +38,18 @@ size_t tryMergeExpressions(QueryPlan::Node * parent_node, QueryPlan::Nodes &) if (parent_expr && child_expr) { - const auto & child_actions = child_expr->getExpression(); - const auto & parent_actions = parent_expr->getExpression(); + auto & child_actions = child_expr->getExpression(); + auto & parent_actions = parent_expr->getExpression(); /// We cannot combine actions with arrayJoin and stateful function because we not always can reorder them. /// Example: select rowNumberInBlock() from (select arrayJoin([1, 2])) /// Such a query will return two zeroes if we combine actions together. - if (child_actions->hasArrayJoin() && parent_actions->hasStatefulFunctions()) + if (child_actions.hasArrayJoin() && parent_actions.hasStatefulFunctions()) return 0; - auto merged = ActionsDAG::merge(std::move(*child_actions), std::move(*parent_actions)); + auto merged = ActionsDAG::merge(std::move(child_actions), std::move(parent_actions)); - auto expr = std::make_unique(child_expr->getInputStreams().front(), merged); + auto expr = std::make_unique(child_expr->getInputStreams().front(), std::move(merged)); expr->setStepDescription("(" + parent_expr->getStepDescription() + " + " + child_expr->getStepDescription() + ")"); parent_node->step = std::move(expr); @@ -58,16 +58,16 @@ size_t tryMergeExpressions(QueryPlan::Node * parent_node, QueryPlan::Nodes &) } else if (parent_filter && child_expr) { - const auto & child_actions = child_expr->getExpression(); - const auto & parent_actions = parent_filter->getExpression(); + auto & child_actions = child_expr->getExpression(); + auto & parent_actions = parent_filter->getExpression(); - if (child_actions->hasArrayJoin() && parent_actions->hasStatefulFunctions()) + if (child_actions.hasArrayJoin() && parent_actions.hasStatefulFunctions()) return 0; - auto merged = ActionsDAG::merge(std::move(*child_actions), std::move(*parent_actions)); + auto merged = ActionsDAG::merge(std::move(child_actions), std::move(parent_actions)); auto filter = std::make_unique(child_expr->getInputStreams().front(), - merged, + std::move(merged), parent_filter->getFilterColumnName(), parent_filter->removesFilterColumn()); filter->setStepDescription("(" + parent_filter->getStepDescription() + " + " + child_expr->getStepDescription() + ")"); @@ -78,32 +78,31 @@ size_t tryMergeExpressions(QueryPlan::Node * parent_node, QueryPlan::Nodes &) } else if (parent_filter && child_filter) { - const auto & child_actions = child_filter->getExpression(); - const auto & parent_actions = parent_filter->getExpression(); + auto & child_actions = child_filter->getExpression(); + auto & parent_actions = parent_filter->getExpression(); - if (child_actions->hasArrayJoin()) + if (child_actions.hasArrayJoin()) return 0; - auto actions = ActionsDAG::clone(child_actions); - const auto & child_filter_node = actions->findInOutputs(child_filter->getFilterColumnName()); + const auto & child_filter_node = child_actions.findInOutputs(child_filter->getFilterColumnName()); if (child_filter->removesFilterColumn()) - removeFromOutputs(*actions, child_filter_node); + removeFromOutputs(child_actions, child_filter_node); - actions->mergeInplace(std::move(*ActionsDAG::clone(parent_actions))); + child_actions.mergeInplace(std::move(parent_actions)); - const auto & parent_filter_node = actions->findInOutputs(parent_filter->getFilterColumnName()); + const auto & parent_filter_node = child_actions.findInOutputs(parent_filter->getFilterColumnName()); if (parent_filter->removesFilterColumn()) - removeFromOutputs(*actions, parent_filter_node); + removeFromOutputs(child_actions, parent_filter_node); FunctionOverloadResolverPtr func_builder_and = std::make_unique(std::make_shared()); - const auto & condition = actions->addFunction(func_builder_and, {&child_filter_node, &parent_filter_node}, {}); - auto & outputs = actions->getOutputs(); + const auto & condition = child_actions.addFunction(func_builder_and, {&child_filter_node, &parent_filter_node}, {}); + auto & outputs = child_actions.getOutputs(); outputs.insert(outputs.begin(), &condition); - actions->removeUnusedActions(false); + child_actions.removeUnusedActions(false); auto filter = std::make_unique(child_filter->getInputStreams().front(), - actions, + std::move(child_actions), condition.result_name, true); filter->setStepDescription("(" + parent_filter->getStepDescription() + " + " + child_filter->getStepDescription() + ")"); diff --git a/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp b/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp index f203d831750..0d9e050d6cb 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp @@ -83,10 +83,11 @@ void optimizePrewhere(Stack & stack, QueryPlan::Nodes &) Names queried_columns = source_step_with_filter->requiredSourceColumns(); + const auto & source_filter_actions_dag = source_step_with_filter->getFilterActionsDAG(); MergeTreeWhereOptimizer where_optimizer{ std::move(column_compressed_sizes), storage_metadata, - storage.getConditionSelectivityEstimatorByPredicate(storage_snapshot, source_step_with_filter->getFilterActionsDAG(), context), + storage.getConditionSelectivityEstimatorByPredicate(storage_snapshot, source_filter_actions_dag ? &*source_filter_actions_dag : nullptr, context), queried_columns, storage.supportedPrewhereColumns(), getLogger("QueryPlanOptimizePrewhere")}; @@ -113,15 +114,15 @@ void optimizePrewhere(Stack & stack, QueryPlan::Nodes &) if (prewhere_info->remove_prewhere_column) { - removeFromOutput(*filter_expression, filter_column_name); - auto & outputs = filter_expression->getOutputs(); + removeFromOutput(filter_expression, filter_column_name); + auto & outputs = filter_expression.getOutputs(); size_t size = outputs.size(); outputs.insert(outputs.end(), optimize_result.prewhere_nodes.begin(), optimize_result.prewhere_nodes.end()); - filter_expression->removeUnusedActions(false); + filter_expression.removeUnusedActions(false); outputs.resize(size); } - auto split_result = filter_expression->split(optimize_result.prewhere_nodes, true, true); + auto split_result = filter_expression.split(optimize_result.prewhere_nodes, true, true); /// This is the leak of abstraction. /// Splited actions may have inputs which are needed only for PREWHERE. @@ -137,15 +138,15 @@ void optimizePrewhere(Stack & stack, QueryPlan::Nodes &) /// So, here we restore removed inputs for PREWHERE actions { std::unordered_set first_outputs( - split_result.first->getOutputs().begin(), split_result.first->getOutputs().end()); - for (const auto * input : split_result.first->getInputs()) + split_result.first.getOutputs().begin(), split_result.first.getOutputs().end()); + for (const auto * input : split_result.first.getInputs()) { if (!first_outputs.contains(input)) { - split_result.first->getOutputs().push_back(input); + split_result.first.getOutputs().push_back(input); /// Add column to second actions as input. /// Do not add it to result, so it would be removed. - split_result.second->addInput(input->result_name, input->result_type); + split_result.second.addInput(input->result_name, input->result_type); } } } diff --git a/src/Processors/QueryPlan/Optimizations/optimizePrimaryKeyConditionAndLimit.cpp b/src/Processors/QueryPlan/Optimizations/optimizePrimaryKeyConditionAndLimit.cpp index 0afddede708..71a7ca327b1 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizePrimaryKeyConditionAndLimit.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizePrimaryKeyConditionAndLimit.cpp @@ -18,16 +18,16 @@ void optimizePrimaryKeyConditionAndLimit(const Stack & stack) const auto & storage_prewhere_info = source_step_with_filter->getPrewhereInfo(); if (storage_prewhere_info) { - source_step_with_filter->addFilter(ActionsDAG::clone(storage_prewhere_info->prewhere_actions), storage_prewhere_info->prewhere_column_name); + source_step_with_filter->addFilter(ActionsDAG::clone(&*storage_prewhere_info->prewhere_actions), storage_prewhere_info->prewhere_column_name); if (storage_prewhere_info->row_level_filter) - source_step_with_filter->addFilter(ActionsDAG::clone(storage_prewhere_info->row_level_filter), storage_prewhere_info->row_level_column_name); + source_step_with_filter->addFilter(ActionsDAG::clone(&*storage_prewhere_info->row_level_filter), storage_prewhere_info->row_level_column_name); } for (auto iter = stack.rbegin() + 1; iter != stack.rend(); ++iter) { if (auto * filter_step = typeid_cast(iter->node->step.get())) { - source_step_with_filter->addFilter(ActionsDAG::clone(filter_step->getExpression()), filter_step->getFilterColumnName()); + source_step_with_filter->addFilter(ActionsDAG::clone(&filter_step->getExpression()), filter_step->getFilterColumnName()); } else if (auto * limit_step = typeid_cast(iter->node->step.get())) { diff --git a/src/Processors/QueryPlan/Optimizations/optimizeReadInOrder.cpp b/src/Processors/QueryPlan/Optimizations/optimizeReadInOrder.cpp index a8bd98d7460..b3747b81215 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizeReadInOrder.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizeReadInOrder.cpp @@ -170,12 +170,12 @@ static void appendFixedColumnsFromFilterExpression(const ActionsDAG::Node & filt } } -static void appendExpression(ActionsDAGPtr & dag, const ActionsDAGPtr & expression) +static void appendExpression(ActionsDAGPtr & dag, const ActionsDAG & expression) { if (dag) - dag->mergeInplace(std::move(*ActionsDAG::clone(expression))); + dag->mergeInplace(std::move(*ActionsDAG::clone(&expression))); else - dag = ActionsDAG::clone(expression); + dag = ActionsDAG::clone(&expression); } /// This function builds a common DAG which is a merge of DAGs from Filter and Expression steps chain. @@ -193,7 +193,7 @@ void buildSortingDAG(QueryPlan::Node & node, ActionsDAGPtr & dag, FixedColumns & if (prewhere_info->prewhere_actions) { //std::cerr << "====== Adding prewhere " << std::endl; - appendExpression(dag, prewhere_info->prewhere_actions); + appendExpression(dag, *prewhere_info->prewhere_actions); if (const auto * filter_expression = dag->tryFindInOutputs(prewhere_info->prewhere_column_name)) appendFixedColumnsFromFilterExpression(*filter_expression, fixed_columns); } @@ -211,7 +211,7 @@ void buildSortingDAG(QueryPlan::Node & node, ActionsDAGPtr & dag, FixedColumns & const auto & actions = expression->getExpression(); /// Should ignore limit because arrayJoin() can reduce the number of rows in case of empty array. - if (actions->hasArrayJoin()) + if (actions.hasArrayJoin()) limit = 0; appendExpression(dag, actions); @@ -1066,13 +1066,13 @@ size_t tryReuseStorageOrderingForWindowFunctions(QueryPlan::Node * parent_node, for (const auto & actions_dag : window_desc.partition_by_actions) { order_by_elements_actions.emplace_back( - std::make_shared(ActionsDAG::clone(actions_dag.get()), ExpressionActionsSettings::fromContext(context, CompileExpressions::yes))); + std::make_shared(std::move(*ActionsDAG::clone(actions_dag.get())), ExpressionActionsSettings::fromContext(context, CompileExpressions::yes))); } for (const auto & actions_dag : window_desc.order_by_actions) { order_by_elements_actions.emplace_back( - std::make_shared(ActionsDAG::clone(actions_dag.get()), ExpressionActionsSettings::fromContext(context, CompileExpressions::yes))); + std::make_shared(std::move(*ActionsDAG::clone(actions_dag.get())), ExpressionActionsSettings::fromContext(context, CompileExpressions::yes))); } auto order_optimizer = std::make_shared( diff --git a/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp b/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp index da057bd25c2..34e9c8aac0e 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp @@ -273,7 +273,7 @@ static void appendAggregateFunctions( } } -ActionsDAGPtr analyzeAggregateProjection( +std::optional analyzeAggregateProjection( const AggregateProjectionInfo & info, const QueryDAG & query, const DAGIndex & query_index, @@ -393,7 +393,7 @@ ActionsDAGPtr analyzeAggregateProjection( // LOG_TRACE(getLogger("optimizeUseProjections"), "Folding actions by projection"); auto proj_dag = query.dag->foldActionsByProjection(new_inputs, query_key_nodes); - appendAggregateFunctions(*proj_dag, aggregates, *matched_aggregates); + appendAggregateFunctions(proj_dag, aggregates, *matched_aggregates); return proj_dag; } @@ -405,7 +405,7 @@ struct AggregateProjectionCandidate : public ProjectionCandidate /// Actions which need to be applied to columns from projection /// in order to get all the columns required for aggregation. - ActionsDAGPtr dag; + ActionsDAG dag; }; struct MinMaxProjectionCandidate @@ -480,13 +480,13 @@ AggregateProjectionCandidates getAggregateProjectionCandidates( if (auto proj_dag = analyzeAggregateProjection(info, dag, query_index, keys, aggregates)) { // LOG_TRACE(getLogger("optimizeUseProjections"), "Projection analyzed DAG {}", proj_dag->dumpDAG()); - AggregateProjectionCandidate candidate{.info = std::move(info), .dag = std::move(proj_dag)}; + AggregateProjectionCandidate candidate{.info = std::move(info), .dag = std::move(*proj_dag)}; // LOG_TRACE(getLogger("optimizeUseProjections"), "Projection sample block {}", sample_block.dumpStructure()); auto block = reading.getMergeTreeData().getMinMaxCountProjectionBlock( metadata, - candidate.dag->getRequiredColumnsNames(), - (dag.filter_node ? dag.dag.get() : nullptr), + candidate.dag.getRequiredColumnsNames(), + (dag.filter_node ? &*dag.dag : nullptr), parts, max_added_blocks.get(), context); @@ -536,7 +536,7 @@ AggregateProjectionCandidates getAggregateProjectionCandidates( if (auto proj_dag = analyzeAggregateProjection(info, dag, query_index, keys, aggregates)) { // LOG_TRACE(getLogger("optimizeUseProjections"), "Projection analyzed DAG {}", proj_dag->dumpDAG()); - AggregateProjectionCandidate candidate{.info = std::move(info), .dag = std::move(proj_dag)}; + AggregateProjectionCandidate candidate{.info = std::move(info), .dag = std::move(*proj_dag)}; candidate.projection = projection; candidates.real.emplace_back(std::move(candidate)); } @@ -664,7 +664,7 @@ std::optional optimizeUseAggregateProjections(QueryPlan::Node & node, Qu /// Selecting best candidate. for (auto & candidate : candidates.real) { - auto required_column_names = candidate.dag->getRequiredColumnsNames(); + auto required_column_names = candidate.dag.getRequiredColumnsNames(); bool analyzed = analyzeProjectionCandidate( candidate, @@ -675,7 +675,7 @@ std::optional optimizeUseAggregateProjections(QueryPlan::Node & node, Qu query_info, context, max_added_blocks, - candidate.dag.get()); + &candidate.dag); if (!analyzed) continue; @@ -765,7 +765,7 @@ std::optional optimizeUseAggregateProjections(QueryPlan::Node & node, Qu projection_reading = reader.readFromParts( /* parts = */ {}, /* alter_conversions = */ {}, - best_candidate->dag->getRequiredColumnsNames(), + best_candidate->dag.getRequiredColumnsNames(), proj_snapshot, projection_query_info, context, @@ -777,7 +777,7 @@ std::optional optimizeUseAggregateProjections(QueryPlan::Node & node, Qu if (!projection_reading) { - auto header = proj_snapshot->getSampleBlockForColumns(best_candidate->dag->getRequiredColumnsNames()); + auto header = proj_snapshot->getSampleBlockForColumns(best_candidate->dag.getRequiredColumnsNames()); Pipe pipe(std::make_shared(std::move(header))); projection_reading = std::make_unique(std::move(pipe)); } @@ -808,17 +808,19 @@ std::optional optimizeUseAggregateProjections(QueryPlan::Node & node, Qu if (best_candidate) { aggregate_projection_node = &nodes.emplace_back(); + if (candidates.has_filter) { + const auto & result_name = best_candidate->dag.getOutputs().front()->result_name; aggregate_projection_node->step = std::make_unique( projection_reading_node.step->getOutputStream(), - best_candidate->dag, - best_candidate->dag->getOutputs().front()->result_name, + std::move(best_candidate->dag), + result_name, true); } else aggregate_projection_node->step - = std::make_unique(projection_reading_node.step->getOutputStream(), best_candidate->dag); + = std::make_unique(projection_reading_node.step->getOutputStream(), std::move(best_candidate->dag)); aggregate_projection_node->children.push_back(&projection_reading_node); } diff --git a/src/Processors/QueryPlan/Optimizations/optimizeUseNormalProjection.cpp b/src/Processors/QueryPlan/Optimizations/optimizeUseNormalProjection.cpp index c7e96d66817..c0af178f08e 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizeUseNormalProjection.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizeUseNormalProjection.cpp @@ -23,7 +23,7 @@ struct NormalProjectionCandidate : public ProjectionCandidate { }; -static ActionsDAGPtr makeMaterializingDAG(const Block & proj_header, const Block main_header) +static std::optional makeMaterializingDAG(const Block & proj_header, const Block main_header) { /// Materialize constants in case we don't have it in output header. /// This may happen e.g. if we have PREWHERE. @@ -31,7 +31,7 @@ static ActionsDAGPtr makeMaterializingDAG(const Block & proj_header, const Block size_t num_columns = main_header.columns(); /// This is a error; will have block structure mismatch later. if (proj_header.columns() != num_columns) - return nullptr; + return {}; std::vector const_positions; for (size_t i = 0; i < num_columns; ++i) @@ -45,17 +45,17 @@ static ActionsDAGPtr makeMaterializingDAG(const Block & proj_header, const Block } if (const_positions.empty()) - return nullptr; + return {}; - ActionsDAGPtr dag = std::make_unique(); - auto & outputs = dag->getOutputs(); + ActionsDAG dag; + auto & outputs = dag.getOutputs(); for (const auto & col : proj_header.getColumnsWithTypeAndName()) - outputs.push_back(&dag->addInput(col)); + outputs.push_back(&dag.addInput(col)); for (auto pos : const_positions) { auto & output = outputs[pos]; - output = &dag->materializeNode(*output); + output = &dag.materializeNode(*output); } return dag; @@ -172,7 +172,7 @@ std::optional optimizeUseNormalProjections(Stack & stack, QueryPlan::Nod query_info, context, max_added_blocks, - query.filter_node ? query.dag.get() : nullptr); + query.filter_node ? &*query.dag : nullptr); if (!analyzed) continue; @@ -242,14 +242,14 @@ std::optional optimizeUseNormalProjections(Stack & stack, QueryPlan::Nod { expr_or_filter_node.step = std::make_unique( projection_reading_node.step->getOutputStream(), - query.dag, + std::move(*query.dag), query.filter_node->result_name, true); } else expr_or_filter_node.step = std::make_unique( projection_reading_node.step->getOutputStream(), - query.dag); + std::move(*query.dag)); expr_or_filter_node.children.push_back(&projection_reading_node); next_node = &expr_or_filter_node; @@ -267,7 +267,7 @@ std::optional optimizeUseNormalProjections(Stack & stack, QueryPlan::Nod if (auto materializing = makeMaterializingDAG(proj_stream->header, main_stream.header)) { - auto converting = std::make_unique(*proj_stream, materializing); + auto converting = std::make_unique(*proj_stream, std::move(*materializing)); proj_stream = &converting->getOutputStream(); auto & expr_node = nodes.emplace_back(); expr_node.step = std::move(converting); diff --git a/src/Processors/QueryPlan/Optimizations/projectionsCommon.cpp b/src/Processors/QueryPlan/Optimizations/projectionsCommon.cpp index 0e2ad96a419..fb2e6c2096e 100644 --- a/src/Processors/QueryPlan/Optimizations/projectionsCommon.cpp +++ b/src/Processors/QueryPlan/Optimizations/projectionsCommon.cpp @@ -64,12 +64,12 @@ std::shared_ptr getMaxAddedBlocks(ReadFromMergeTree * rea return {}; } -void QueryDAG::appendExpression(const ActionsDAGPtr & expression) +void QueryDAG::appendExpression(const ActionsDAG & expression) { if (dag) - dag->mergeInplace(std::move(*ActionsDAG::clone(expression))); + dag->mergeInplace(std::move(*ActionsDAG::clone(&expression))); else - dag = ActionsDAG::clone(expression); + dag = std::move(*ActionsDAG::clone(&expression)); } const ActionsDAG::Node * findInOutputs(ActionsDAG & dag, const std::string & name, bool remove) @@ -120,7 +120,7 @@ bool QueryDAG::buildImpl(QueryPlan::Node & node, ActionsDAG::NodeRawConstPtrs & { if (prewhere_info->row_level_filter) { - appendExpression(prewhere_info->row_level_filter); + appendExpression(*prewhere_info->row_level_filter); if (const auto * filter_expression = findInOutputs(*dag, prewhere_info->row_level_column_name, false)) filter_nodes.push_back(filter_expression); else @@ -129,7 +129,7 @@ bool QueryDAG::buildImpl(QueryPlan::Node & node, ActionsDAG::NodeRawConstPtrs & if (prewhere_info->prewhere_actions) { - appendExpression(prewhere_info->prewhere_actions); + appendExpression(*prewhere_info->prewhere_actions); if (const auto * filter_expression = findInOutputs(*dag, prewhere_info->prewhere_column_name, prewhere_info->remove_prewhere_column)) filter_nodes.push_back(filter_expression); @@ -149,7 +149,7 @@ bool QueryDAG::buildImpl(QueryPlan::Node & node, ActionsDAG::NodeRawConstPtrs & if (auto * expression = typeid_cast(step)) { const auto & actions = expression->getExpression(); - if (actions->hasArrayJoin()) + if (actions.hasArrayJoin()) return false; appendExpression(actions); @@ -159,7 +159,7 @@ bool QueryDAG::buildImpl(QueryPlan::Node & node, ActionsDAG::NodeRawConstPtrs & if (auto * filter = typeid_cast(step)) { const auto & actions = filter->getExpression(); - if (actions->hasArrayJoin()) + if (actions.hasArrayJoin()) return false; appendExpression(actions); diff --git a/src/Processors/QueryPlan/Optimizations/projectionsCommon.h b/src/Processors/QueryPlan/Optimizations/projectionsCommon.h index 59ad3a43b97..ee0dfddc326 100644 --- a/src/Processors/QueryPlan/Optimizations/projectionsCommon.h +++ b/src/Processors/QueryPlan/Optimizations/projectionsCommon.h @@ -25,14 +25,14 @@ std::shared_ptr getMaxAddedBlocks(ReadFromMergeTree * rea /// Additionally, for all the Filter steps, we collect filter conditions into filter_nodes. struct QueryDAG { - ActionsDAGPtr dag; + std::optional dag; const ActionsDAG::Node * filter_node = nullptr; bool build(QueryPlan::Node & node); private: bool buildImpl(QueryPlan::Node & node, ActionsDAG::NodeRawConstPtrs & filter_nodes); - void appendExpression(const ActionsDAGPtr & expression); + void appendExpression(const ActionsDAG & expression); }; struct ProjectionCandidate diff --git a/src/Processors/QueryPlan/Optimizations/removeRedundantDistinct.cpp b/src/Processors/QueryPlan/Optimizations/removeRedundantDistinct.cpp index 81a8a537830..d0acd8221d4 100644 --- a/src/Processors/QueryPlan/Optimizations/removeRedundantDistinct.cpp +++ b/src/Processors/QueryPlan/Optimizations/removeRedundantDistinct.cpp @@ -132,10 +132,10 @@ namespace return true; if (const auto * const expr = typeid_cast(step); expr) - return !expr->getExpression()->hasArrayJoin(); + return !expr->getExpression().hasArrayJoin(); if (const auto * const filter = typeid_cast(step); filter) - return !filter->getExpression()->hasArrayJoin(); + return !filter->getExpression().hasArrayJoin(); if (typeid_cast(step) || typeid_cast(step) || typeid_cast(step) || typeid_cast(step)) @@ -183,9 +183,9 @@ namespace } if (const auto * const expr = typeid_cast(current_step); expr) - dag_stack.push_back(expr->getExpression().get()); + dag_stack.push_back(&expr->getExpression()); else if (const auto * const filter = typeid_cast(current_step); filter) - dag_stack.push_back(filter->getExpression().get()); + dag_stack.push_back(&filter->getExpression()); node = node->children.front(); if (inner_distinct_step = typeid_cast(node->step.get()); inner_distinct_step) @@ -236,9 +236,9 @@ namespace } if (const auto * const expr = typeid_cast(current_step); expr) - dag_stack.push_back(expr->getExpression().get()); + dag_stack.push_back(&expr->getExpression()); else if (const auto * const filter = typeid_cast(current_step); filter) - dag_stack.push_back(filter->getExpression().get()); + dag_stack.push_back(&filter->getExpression()); node = node->children.front(); inner_distinct_step = typeid_cast(node->step.get()); diff --git a/src/Processors/QueryPlan/Optimizations/removeRedundantSorting.cpp b/src/Processors/QueryPlan/Optimizations/removeRedundantSorting.cpp index 632eba6ab5f..7cac7bee6ec 100644 --- a/src/Processors/QueryPlan/Optimizations/removeRedundantSorting.cpp +++ b/src/Processors/QueryPlan/Optimizations/removeRedundantSorting.cpp @@ -213,12 +213,12 @@ private: logStep("checking for stateful function", node); if (const auto * expr = typeid_cast(step); expr) { - if (expr->getExpression()->hasStatefulFunctions()) + if (expr->getExpression().hasStatefulFunctions()) return false; } else if (const auto * filter = typeid_cast(step); filter) { - if (filter->getExpression()->hasStatefulFunctions()) + if (filter->getExpression().hasStatefulFunctions()) return false; } else diff --git a/src/Processors/QueryPlan/Optimizations/splitFilter.cpp b/src/Processors/QueryPlan/Optimizations/splitFilter.cpp index 561ad7302c6..6aed57634b0 100644 --- a/src/Processors/QueryPlan/Optimizations/splitFilter.cpp +++ b/src/Processors/QueryPlan/Optimizations/splitFilter.cpp @@ -17,13 +17,13 @@ size_t trySplitFilter(QueryPlan::Node * node, QueryPlan::Nodes & nodes) const std::string & filter_column_name = filter_step->getFilterColumnName(); /// Do not split if there are function like runningDifference. - if (expr->hasStatefulFunctions()) + if (expr.hasStatefulFunctions()) return 0; bool filter_name_clashs_with_input = false; if (filter_step->removesFilterColumn()) { - for (const auto * input : expr->getInputs()) + for (const auto * input : expr.getInputs()) { if (input->result_name == filter_column_name) { @@ -33,14 +33,14 @@ size_t trySplitFilter(QueryPlan::Node * node, QueryPlan::Nodes & nodes) } } - auto split = expr->splitActionsForFilter(filter_column_name); + auto split = expr.splitActionsForFilter(filter_column_name); - if (split.second->trivial()) + if (split.second.trivial()) return 0; bool remove_filter = false; if (filter_step->removesFilterColumn()) - remove_filter = split.second->removeUnusedResult(filter_column_name); + remove_filter = split.second.removeUnusedResult(filter_column_name); auto description = filter_step->getStepDescription(); @@ -53,11 +53,11 @@ size_t trySplitFilter(QueryPlan::Node * node, QueryPlan::Nodes & nodes) { split_filter_name = "__split_filter"; - for (auto & filter_output : split.first->getOutputs()) + for (auto & filter_output : split.first.getOutputs()) { if (filter_output->result_name == filter_column_name) { - filter_output = &split.first->addAlias(*filter_output, split_filter_name); + filter_output = &split.first.addAlias(*filter_output, split_filter_name); break; } } diff --git a/src/Processors/QueryPlan/Optimizations/useDataParallelAggregation.cpp b/src/Processors/QueryPlan/Optimizations/useDataParallelAggregation.cpp index 124cb735d5a..7e0260c0040 100644 --- a/src/Processors/QueryPlan/Optimizations/useDataParallelAggregation.cpp +++ b/src/Processors/QueryPlan/Optimizations/useDataParallelAggregation.cpp @@ -146,16 +146,16 @@ bool allOutputsDependsOnlyOnAllowedNodes( /// 3. We match partition key actions with group by key actions to find col1', ..., coln' in partition key actions. /// 4. We check that partition key is indeed a deterministic function of col1', ..., coln'. bool isPartitionKeySuitsGroupByKey( - const ReadFromMergeTree & reading, const ActionsDAGPtr & group_by_actions, const AggregatingStep & aggregating) + const ReadFromMergeTree & reading, const ActionsDAG & group_by_actions, const AggregatingStep & aggregating) { if (aggregating.isGroupingSets()) return false; - if (group_by_actions->hasArrayJoin() || group_by_actions->hasStatefulFunctions() || group_by_actions->hasNonDeterministic()) + if (group_by_actions.hasArrayJoin() || group_by_actions.hasStatefulFunctions() || group_by_actions.hasNonDeterministic()) return false; /// We are interested only in calculations required to obtain group by keys (and not aggregate function arguments for example). - auto key_nodes = group_by_actions->findInOutpus(aggregating.getParams().keys); + auto key_nodes = group_by_actions.findInOutpus(aggregating.getParams().keys); auto group_by_key_actions = ActionsDAG::cloneSubDAG(key_nodes, /*remove_aliases=*/ true); const auto & gb_key_required_columns = group_by_key_actions->getRequiredColumnsNames(); diff --git a/src/Processors/QueryPlan/PartsSplitter.cpp b/src/Processors/QueryPlan/PartsSplitter.cpp index ed4b1906635..a12fce95b10 100644 --- a/src/Processors/QueryPlan/PartsSplitter.cpp +++ b/src/Processors/QueryPlan/PartsSplitter.cpp @@ -943,7 +943,7 @@ SplitPartsWithRangesByPrimaryKeyResult splitPartsWithRangesByPrimaryKey( auto syntax_result = TreeRewriter(context).analyze(filter_function, primary_key.expression->getRequiredColumnsWithTypes()); auto actions = ExpressionAnalyzer(filter_function, syntax_result, context).getActionsDAG(false); - reorderColumns(*actions, result.merging_pipes[i].getHeader(), filter_function->getColumnName()); + reorderColumns(actions, result.merging_pipes[i].getHeader(), filter_function->getColumnName()); ExpressionActionsPtr expression_actions = std::make_shared(std::move(actions)); auto description = fmt::format( "filter values in ({}, {}]", i ? ::toString(borders[i - 1]) : "-inf", i < borders.size() ? ::toString(borders[i]) : "+inf"); diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index eca3cc54ce9..bc878e7ee49 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -799,7 +799,7 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreams(RangesInDataParts && parts_ info.use_uncompressed_cache); }; - auto sorting_expr = std::make_shared(ActionsDAG::clone(&metadata_for_reading->getSortingKey().expression->getActionsDAG())); + auto sorting_expr = std::make_shared(std::move(*ActionsDAG::clone(&metadata_for_reading->getSortingKey().expression->getActionsDAG()))); SplitPartsWithRangesByPrimaryKeyResult split_ranges_result = splitPartsWithRangesByPrimaryKey( metadata_for_reading->getPrimaryKey(), @@ -848,16 +848,16 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreams(RangesInDataParts && parts_ info.use_uncompressed_cache); } -static ActionsDAGPtr createProjection(const Block & header) +static ActionsDAG createProjection(const Block & header) { - return std::make_unique(header.getNamesAndTypesList()); + return ActionsDAG(header.getNamesAndTypesList()); } Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsWithOrder( RangesInDataParts && parts_with_ranges, size_t num_streams, const Names & column_names, - ActionsDAGPtr & out_projection, + std::optional & out_projection, const InputOrderInfoPtr & input_order_info) { const auto & settings = context->getSettingsRef(); @@ -1171,7 +1171,7 @@ bool ReadFromMergeTree::doNotMergePartsAcrossPartitionsFinal() const } Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsFinal( - RangesInDataParts && parts_with_ranges, size_t num_streams, const Names & origin_column_names, const Names & column_names, ActionsDAGPtr & out_projection) + RangesInDataParts && parts_with_ranges, size_t num_streams, const Names & origin_column_names, const Names & column_names, std::optional & out_projection) { const auto & settings = context->getSettingsRef(); const auto & data_settings = data.getSettings(); @@ -1212,7 +1212,7 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsFinal( /// we will store lonely parts with level > 0 to use parallel select on them. RangesInDataParts non_intersecting_parts_by_primary_key; - auto sorting_expr = std::make_shared(ActionsDAG::clone(&metadata_for_reading->getSortingKey().expression->getActionsDAG())); + auto sorting_expr = std::make_shared(std::move(*ActionsDAG::clone(&metadata_for_reading->getSortingKey().expression->getActionsDAG()))); if (prewhere_info) { @@ -1333,7 +1333,7 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsFinal( if (!merging_pipes.empty() && !no_merging_pipes.empty()) { - out_projection = nullptr; /// We do projection here + out_projection = {}; /// We do projection here Pipes pipes; pipes.resize(2); pipes[0] = Pipe::unitePipes(std::move(merging_pipes)); @@ -1519,7 +1519,8 @@ void ReadFromMergeTree::applyFilters(ActionDAGNodes added_filter_nodes) /// (1) SourceStepWithFilter::filter_nodes, (2) query_info.filter_actions_dag. Make sure there are consistent. /// TODO: Get rid of filter_actions_dag in query_info after we move analysis of /// parallel replicas and unused shards into optimization, similar to projection analysis. - query_info.filter_actions_dag = std::move(filter_actions_dag); + if (filter_actions_dag) + query_info.filter_actions_dag = std::make_shared(std::move(*filter_actions_dag)); buildIndexes( indexes, @@ -1833,7 +1834,7 @@ bool ReadFromMergeTree::isQueryWithSampling() const } Pipe ReadFromMergeTree::spreadMarkRanges( - RangesInDataParts && parts_with_ranges, size_t num_streams, AnalysisResult & result, ActionsDAGPtr & result_projection) + RangesInDataParts && parts_with_ranges, size_t num_streams, AnalysisResult & result, std::optional & result_projection) { const bool final = isQueryWithFinal(); Names column_names_to_read = result.column_names_to_read; @@ -1894,7 +1895,7 @@ Pipe ReadFromMergeTree::spreadMarkRanges( } } -Pipe ReadFromMergeTree::groupStreamsByPartition(AnalysisResult & result, ActionsDAGPtr & result_projection) +Pipe ReadFromMergeTree::groupStreamsByPartition(AnalysisResult & result, std::optional & result_projection) { auto && parts_with_ranges = std::move(result.parts_with_ranges); @@ -1983,7 +1984,7 @@ void ReadFromMergeTree::initializePipeline(QueryPipelineBuilder & pipeline, cons /// Projection, that needed to drop columns, which have appeared by execution /// of some extra expressions, and to allow execute the same expressions later. /// NOTE: It may lead to double computation of expressions. - ActionsDAGPtr result_projection; + std::optional result_projection; Pipe pipe = output_each_partition_through_separate_port ? groupStreamsByPartition(result, result_projection) @@ -2000,7 +2001,7 @@ void ReadFromMergeTree::initializePipeline(QueryPipelineBuilder & pipeline, cons if (result.sampling.use_sampling) { - auto sampling_actions = std::make_shared(ActionsDAG::clone(result.sampling.filter_expression.get())); + auto sampling_actions = std::make_shared(std::move(*ActionsDAG::clone(result.sampling.filter_expression.get()))); pipe.addSimpleTransform([&](const Block & header) { return std::make_shared( @@ -2013,12 +2014,12 @@ void ReadFromMergeTree::initializePipeline(QueryPipelineBuilder & pipeline, cons Block cur_header = pipe.getHeader(); - auto append_actions = [&result_projection](ActionsDAGPtr actions) + auto append_actions = [&result_projection](ActionsDAG actions) { if (!result_projection) result_projection = std::move(actions); else - result_projection = ActionsDAG::merge(std::move(*result_projection), std::move(*actions)); + result_projection = ActionsDAG::merge(std::move(*result_projection), std::move(actions)); }; if (result_projection) @@ -2038,7 +2039,7 @@ void ReadFromMergeTree::initializePipeline(QueryPipelineBuilder & pipeline, cons if (result_projection) { - auto projection_actions = std::make_shared(ActionsDAG::clone(result_projection)); + auto projection_actions = std::make_shared(std::move(*result_projection)); pipe.addSimpleTransform([&](const Block & header) { return std::make_shared(header, projection_actions); @@ -2133,7 +2134,7 @@ void ReadFromMergeTree::describeActions(FormatSettings & format_settings) const format_settings.out << " (removed)"; format_settings.out << '\n'; - auto expression = std::make_shared(ActionsDAG::clone(prewhere_info->prewhere_actions)); + auto expression = std::make_shared(std::move(*ActionsDAG::clone(&*prewhere_info->prewhere_actions))); expression->describeActions(format_settings.out, prefix); } @@ -2142,7 +2143,7 @@ void ReadFromMergeTree::describeActions(FormatSettings & format_settings) const format_settings.out << prefix << "Row level filter" << '\n'; format_settings.out << prefix << "Row level filter column: " << prewhere_info->row_level_column_name << '\n'; - auto expression = std::make_shared(ActionsDAG::clone(prewhere_info->row_level_filter)); + auto expression = std::make_shared(std::move(*ActionsDAG::clone(&*prewhere_info->row_level_filter))); expression->describeActions(format_settings.out, prefix); } } @@ -2168,7 +2169,7 @@ void ReadFromMergeTree::describeActions(JSONBuilder::JSONMap & map) const std::unique_ptr prewhere_filter_map = std::make_unique(); prewhere_filter_map->add("Prewhere filter column", prewhere_info->prewhere_column_name); prewhere_filter_map->add("Prewhere filter remove filter column", prewhere_info->remove_prewhere_column); - auto expression = std::make_shared(ActionsDAG::clone(prewhere_info->prewhere_actions)); + auto expression = std::make_shared(std::move(*ActionsDAG::clone(&*prewhere_info->prewhere_actions))); prewhere_filter_map->add("Prewhere filter expression", expression->toTree()); prewhere_info_map->add("Prewhere filter", std::move(prewhere_filter_map)); @@ -2178,7 +2179,7 @@ void ReadFromMergeTree::describeActions(JSONBuilder::JSONMap & map) const { std::unique_ptr row_level_filter_map = std::make_unique(); row_level_filter_map->add("Row level filter column", prewhere_info->row_level_column_name); - auto expression = std::make_shared(ActionsDAG::clone(prewhere_info->row_level_filter)); + auto expression = std::make_shared(std::move(*ActionsDAG::clone(&*prewhere_info->row_level_filter))); row_level_filter_map->add("Row level filter expression", expression->toTree()); prewhere_info_map->add("Row level filter", std::move(row_level_filter_map)); diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.h b/src/Processors/QueryPlan/ReadFromMergeTree.h index e32507e1f22..a12f53924c3 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.h +++ b/src/Processors/QueryPlan/ReadFromMergeTree.h @@ -243,9 +243,9 @@ private: Pipe readFromPoolParallelReplicas(RangesInDataParts parts_with_range, Names required_columns, PoolSettings pool_settings); Pipe readInOrder(RangesInDataParts parts_with_ranges, Names required_columns, PoolSettings pool_settings, ReadType read_type, UInt64 limit); - Pipe spreadMarkRanges(RangesInDataParts && parts_with_ranges, size_t num_streams, AnalysisResult & result, ActionsDAGPtr & result_projection); + Pipe spreadMarkRanges(RangesInDataParts && parts_with_ranges, size_t num_streams, AnalysisResult & result, std::optional & result_projection); - Pipe groupStreamsByPartition(AnalysisResult & result, ActionsDAGPtr & result_projection); + Pipe groupStreamsByPartition(AnalysisResult & result, std::optional & result_projection); Pipe spreadMarkRangesAmongStreams(RangesInDataParts && parts_with_ranges, size_t num_streams, const Names & column_names); @@ -253,13 +253,13 @@ private: RangesInDataParts && parts_with_ranges, size_t num_streams, const Names & column_names, - ActionsDAGPtr & out_projection, + std::optional & out_projection, const InputOrderInfoPtr & input_order_info); bool doNotMergePartsAcrossPartitionsFinal() const; Pipe spreadMarkRangesAmongStreamsFinal( - RangesInDataParts && parts, size_t num_streams, const Names & origin_column_names, const Names & column_names, ActionsDAGPtr & out_projection); + RangesInDataParts && parts, size_t num_streams, const Names & origin_column_names, const Names & column_names, std::optional & out_projection); ReadFromMergeTree::AnalysisResult getAnalysisResult() const; diff --git a/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp b/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp index 90fe609a17d..ca98f7c2110 100644 --- a/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp +++ b/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp @@ -441,7 +441,7 @@ Pipe ReadFromSystemNumbersStep::makePipe() chassert(numbers_storage.step != UInt64{0}); /// Build rpn of query filters - KeyCondition condition(filter_actions_dag.get(), context, column_names, key_expression); + KeyCondition condition(filter_actions_dag ? &*filter_actions_dag : nullptr, context, column_names, key_expression); if (condition.extractPlainRanges(ranges)) { diff --git a/src/Processors/QueryPlan/SourceStepWithFilter.cpp b/src/Processors/QueryPlan/SourceStepWithFilter.cpp index 79b225e7f93..55c9b5e442e 100644 --- a/src/Processors/QueryPlan/SourceStepWithFilter.cpp +++ b/src/Processors/QueryPlan/SourceStepWithFilter.cpp @@ -110,7 +110,7 @@ void SourceStepWithFilter::describeActions(FormatSettings & format_settings) con format_settings.out << " (removed)"; format_settings.out << '\n'; - auto expression = std::make_shared(ActionsDAG::clone(prewhere_info->prewhere_actions)); + auto expression = std::make_shared(std::move(*ActionsDAG::clone(&*prewhere_info->prewhere_actions))); expression->describeActions(format_settings.out, prefix); } @@ -119,7 +119,7 @@ void SourceStepWithFilter::describeActions(FormatSettings & format_settings) con format_settings.out << prefix << "Row level filter" << '\n'; format_settings.out << prefix << "Row level filter column: " << prewhere_info->row_level_column_name << '\n'; - auto expression = std::make_shared(ActionsDAG::clone(prewhere_info->row_level_filter)); + auto expression = std::make_shared(std::move(*ActionsDAG::clone(&*prewhere_info->row_level_filter))); expression->describeActions(format_settings.out, prefix); } } @@ -137,7 +137,7 @@ void SourceStepWithFilter::describeActions(JSONBuilder::JSONMap & map) const std::unique_ptr prewhere_filter_map = std::make_unique(); prewhere_filter_map->add("Prewhere filter column", prewhere_info->prewhere_column_name); prewhere_filter_map->add("Prewhere filter remove filter column", prewhere_info->remove_prewhere_column); - auto expression = std::make_shared(ActionsDAG::clone(prewhere_info->prewhere_actions)); + auto expression = std::make_shared(std::move(*ActionsDAG::clone(&*prewhere_info->prewhere_actions))); prewhere_filter_map->add("Prewhere filter expression", expression->toTree()); prewhere_info_map->add("Prewhere filter", std::move(prewhere_filter_map)); @@ -147,7 +147,7 @@ void SourceStepWithFilter::describeActions(JSONBuilder::JSONMap & map) const { std::unique_ptr row_level_filter_map = std::make_unique(); row_level_filter_map->add("Row level filter column", prewhere_info->row_level_column_name); - auto expression = std::make_shared(ActionsDAG::clone(prewhere_info->row_level_filter)); + auto expression = std::make_shared(std::move(*ActionsDAG::clone(&*prewhere_info->row_level_filter))); row_level_filter_map->add("Row level filter expression", expression->toTree()); prewhere_info_map->add("Row level filter", std::move(row_level_filter_map)); diff --git a/src/Processors/QueryPlan/SourceStepWithFilter.h b/src/Processors/QueryPlan/SourceStepWithFilter.h index 91b62efa860..f7a030c0628 100644 --- a/src/Processors/QueryPlan/SourceStepWithFilter.h +++ b/src/Processors/QueryPlan/SourceStepWithFilter.h @@ -33,8 +33,8 @@ public: { } - const ActionsDAGPtr & getFilterActionsDAG() const { return filter_actions_dag; } - ActionsDAGPtr detachFilterActionsDAG() { return std::move(filter_actions_dag); } + const std::optional & getFilterActionsDAG() const { return filter_actions_dag; } + std::optional detachFilterActionsDAG() { return std::move(filter_actions_dag); } const SelectQueryInfo & getQueryInfo() const { return query_info; } const PrewhereInfoPtr & getPrewhereInfo() const { return prewhere_info; } @@ -81,7 +81,7 @@ protected: ContextPtr context; std::optional limit; - ActionsDAGPtr filter_actions_dag; + std::optional filter_actions_dag; private: /// Will be cleared after applyFilters() is called. diff --git a/src/Processors/QueryPlan/TotalsHavingStep.cpp b/src/Processors/QueryPlan/TotalsHavingStep.cpp index 19632b1862f..4aa4f10ac86 100644 --- a/src/Processors/QueryPlan/TotalsHavingStep.cpp +++ b/src/Processors/QueryPlan/TotalsHavingStep.cpp @@ -28,7 +28,7 @@ TotalsHavingStep::TotalsHavingStep( const DataStream & input_stream_, const AggregateDescriptions & aggregates_, bool overflow_row_, - const ActionsDAGPtr & actions_dag_, + std::optional actions_dag_, const std::string & filter_column_, bool remove_filter_, TotalsMode totals_mode_, @@ -38,7 +38,7 @@ TotalsHavingStep::TotalsHavingStep( input_stream_, TotalsHavingTransform::transformHeader( input_stream_.header, - actions_dag_.get(), + actions_dag_ ? &*actions_dag_ : nullptr, filter_column_, remove_filter_, final_, @@ -46,7 +46,7 @@ TotalsHavingStep::TotalsHavingStep( getTraits(!filter_column_.empty())) , aggregates(aggregates_) , overflow_row(overflow_row_) - , actions_dag(ActionsDAG::clone(actions_dag_)) + , actions_dag(std::move(actions_dag_)) , filter_column_name(filter_column_) , remove_filter(remove_filter_) , totals_mode(totals_mode_) @@ -57,7 +57,7 @@ TotalsHavingStep::TotalsHavingStep( void TotalsHavingStep::transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings & settings) { - auto expression_actions = actions_dag ? std::make_shared(ActionsDAG::clone(actions_dag), settings.getActionsSettings()) : nullptr; + auto expression_actions = actions_dag ? std::make_shared(std::move(*actions_dag), settings.getActionsSettings()) : nullptr; auto totals_having = std::make_shared( pipeline.getHeader(), @@ -100,7 +100,7 @@ void TotalsHavingStep::describeActions(FormatSettings & settings) const if (actions_dag) { bool first = true; - auto expression = std::make_shared(ActionsDAG::clone(actions_dag)); + auto expression = std::make_shared(std::move(*ActionsDAG::clone(getActions()))); for (const auto & action : expression->getActions()) { settings.out << prefix << (first ? "Actions: " @@ -117,7 +117,7 @@ void TotalsHavingStep::describeActions(JSONBuilder::JSONMap & map) const if (actions_dag) { map.add("Filter column", filter_column_name); - auto expression = std::make_shared(ActionsDAG::clone(actions_dag)); + auto expression = std::make_shared(std::move(*ActionsDAG::clone(getActions()))); map.add("Expression", expression->toTree()); } } @@ -128,7 +128,7 @@ void TotalsHavingStep::updateOutputStream() input_streams.front(), TotalsHavingTransform::transformHeader( input_streams.front().header, - actions_dag.get(), + getActions(), filter_column_name, remove_filter, final, diff --git a/src/Processors/QueryPlan/TotalsHavingStep.h b/src/Processors/QueryPlan/TotalsHavingStep.h index 52ef5437701..927b8d99de3 100644 --- a/src/Processors/QueryPlan/TotalsHavingStep.h +++ b/src/Processors/QueryPlan/TotalsHavingStep.h @@ -1,6 +1,7 @@ #pragma once #include #include +#include namespace DB { @@ -18,7 +19,7 @@ public: const DataStream & input_stream_, const AggregateDescriptions & aggregates_, bool overflow_row_, - const ActionsDAGPtr & actions_dag_, + std::optional actions_dag_, const std::string & filter_column_, bool remove_filter_, TotalsMode totals_mode_, @@ -32,7 +33,7 @@ public: void describeActions(JSONBuilder::JSONMap & map) const override; void describeActions(FormatSettings & settings) const override; - const ActionsDAGPtr & getActions() const { return actions_dag; } + const ActionsDAG * getActions() const { return actions_dag ? &*actions_dag : nullptr; } private: void updateOutputStream() override; @@ -40,7 +41,7 @@ private: const AggregateDescriptions aggregates; bool overflow_row; - ActionsDAGPtr actions_dag; + std::optional actions_dag; String filter_column_name; bool remove_filter; TotalsMode totals_mode; diff --git a/src/Processors/SourceWithKeyCondition.h b/src/Processors/SourceWithKeyCondition.h index fcf576637ff..cfd3eb236b7 100644 --- a/src/Processors/SourceWithKeyCondition.h +++ b/src/Processors/SourceWithKeyCondition.h @@ -16,13 +16,13 @@ protected: /// Represents pushed down filters in source std::shared_ptr key_condition; - void setKeyConditionImpl(const ActionsDAG * filter_actions_dag, ContextPtr context, const Block & keys) + void setKeyConditionImpl(const std::optional & filter_actions_dag, ContextPtr context, const Block & keys) { key_condition = std::make_shared( - filter_actions_dag, + filter_actions_dag ? &*filter_actions_dag : nullptr, context, keys.getNames(), - std::make_shared(std::make_unique(keys.getColumnsWithTypeAndName()))); + std::make_shared(ActionsDAG(keys.getColumnsWithTypeAndName()))); } public: @@ -33,6 +33,6 @@ public: virtual void setKeyCondition(const std::shared_ptr & key_condition_) { key_condition = key_condition_; } /// Set key_condition created by filter_actions_dag and context. - virtual void setKeyCondition(const ActionsDAGPtr & /*filter_actions_dag*/, ContextPtr /*context*/) { } + virtual void setKeyCondition(const std::optional & /*filter_actions_dag*/, ContextPtr /*context*/) { } }; } diff --git a/src/Processors/Transforms/AddingDefaultsTransform.cpp b/src/Processors/Transforms/AddingDefaultsTransform.cpp index 7945b3999c1..da4d3a0041b 100644 --- a/src/Processors/Transforms/AddingDefaultsTransform.cpp +++ b/src/Processors/Transforms/AddingDefaultsTransform.cpp @@ -178,7 +178,7 @@ void AddingDefaultsTransform::transform(Chunk & chunk) auto dag = evaluateMissingDefaults(evaluate_block, header.getNamesAndTypesList(), columns, context, false); if (dag) { - auto actions = std::make_shared(std::move(dag), ExpressionActionsSettings::fromContext(context, CompileExpressions::yes), true); + auto actions = std::make_shared(std::move(*dag), ExpressionActionsSettings::fromContext(context, CompileExpressions::yes), true); actions->execute(evaluate_block); } diff --git a/src/Processors/Transforms/FillingTransform.cpp b/src/Processors/Transforms/FillingTransform.cpp index bbe57fc6441..36ffc515f43 100644 --- a/src/Processors/Transforms/FillingTransform.cpp +++ b/src/Processors/Transforms/FillingTransform.cpp @@ -203,7 +203,7 @@ FillingTransform::FillingTransform( , use_with_fill_by_sorting_prefix(use_with_fill_by_sorting_prefix_) { if (interpolate_description) - interpolate_actions = std::make_shared(ActionsDAG::clone(interpolate_description->actions)); + interpolate_actions = std::make_shared(std::move(*ActionsDAG::clone(&interpolate_description->actions))); std::vector is_fill_column(header_.columns()); for (size_t i = 0, size = fill_description.size(); i < size; ++i) diff --git a/src/Storages/Hive/StorageHive.cpp b/src/Storages/Hive/StorageHive.cpp index 2cd51259549..da5a45f36d5 100644 --- a/src/Storages/Hive/StorageHive.cpp +++ b/src/Storages/Hive/StorageHive.cpp @@ -516,7 +516,7 @@ void StorageHive::initMinMaxIndexExpression() partition_names = partition_name_types.getNames(); partition_types = partition_name_types.getTypes(); partition_minmax_idx_expr = std::make_shared( - std::make_unique(partition_name_types), ExpressionActionsSettings::fromContext(getContext())); + ActionsDAG(partition_name_types), ExpressionActionsSettings::fromContext(getContext())); } NamesAndTypesList all_name_types = metadata_snapshot->getColumns().getAllPhysical(); @@ -526,7 +526,7 @@ void StorageHive::initMinMaxIndexExpression() hivefile_name_types.push_back(column); } hivefile_minmax_idx_expr = std::make_shared( - std::make_unique(hivefile_name_types), ExpressionActionsSettings::fromContext(getContext())); + ActionsDAG(hivefile_name_types), ExpressionActionsSettings::fromContext(getContext())); } ASTPtr StorageHive::extractKeyExpressionList(const ASTPtr & node) @@ -583,7 +583,7 @@ static HiveFilePtr createHiveFile( HiveFiles StorageHive::collectHiveFilesFromPartition( const Apache::Hadoop::Hive::Partition & partition, - const ActionsDAGPtr & filter_actions_dag, + const ActionsDAG * filter_actions_dag, const HiveTableMetadataPtr & hive_table_metadata, const HDFSFSPtr & fs, const ContextPtr & context_, @@ -647,7 +647,7 @@ HiveFiles StorageHive::collectHiveFilesFromPartition( for (size_t i = 0; i < partition_names.size(); ++i) ranges.emplace_back(fields[i]); - const KeyCondition partition_key_condition(filter_actions_dag.get(), getContext(), partition_names, partition_minmax_idx_expr); + const KeyCondition partition_key_condition(filter_actions_dag, getContext(), partition_names, partition_minmax_idx_expr); if (!partition_key_condition.checkInHyperrectangle(ranges, partition_types).can_be_true) return {}; } @@ -681,7 +681,7 @@ StorageHive::listDirectory(const String & path, const HiveTableMetadataPtr & hiv HiveFilePtr StorageHive::getHiveFileIfNeeded( const FileInfo & file_info, const FieldVector & fields, - const ActionsDAGPtr & filter_actions_dag, + const ActionsDAG * filter_actions_dag, const HiveTableMetadataPtr & hive_table_metadata, const ContextPtr & context_, PruneLevel prune_level) const @@ -715,7 +715,7 @@ HiveFilePtr StorageHive::getHiveFileIfNeeded( if (prune_level >= PruneLevel::File) { - const KeyCondition hivefile_key_condition(filter_actions_dag.get(), getContext(), hivefile_name_types.getNames(), hivefile_minmax_idx_expr); + const KeyCondition hivefile_key_condition(filter_actions_dag, getContext(), hivefile_name_types.getNames(), hivefile_minmax_idx_expr); if (hive_file->useFileMinMaxIndex()) { /// Load file level minmax index and apply @@ -828,7 +828,7 @@ void ReadFromHive::createFiles() if (hive_files) return; - hive_files = storage->collectHiveFiles(num_streams, filter_actions_dag, hive_table_metadata, fs, context); + hive_files = storage->collectHiveFiles(num_streams, filter_actions_dag ? &*filter_actions_dag : nullptr, hive_table_metadata, fs, context); LOG_INFO(log, "Collect {} hive files to read", hive_files->size()); } @@ -950,7 +950,7 @@ void ReadFromHive::initializePipeline(QueryPipelineBuilder & pipeline, const Bui HiveFiles StorageHive::collectHiveFiles( size_t max_threads, - const ActionsDAGPtr & filter_actions_dag, + const ActionsDAG * filter_actions_dag, const HiveTableMetadataPtr & hive_table_metadata, const HDFSFSPtr & fs, const ContextPtr & context_, @@ -1023,12 +1023,12 @@ SinkToStoragePtr StorageHive::write(const ASTPtr & /*query*/, const StorageMetad std::optional StorageHive::totalRows(const Settings & settings) const { /// query_info is not used when prune_level == PruneLevel::None - return totalRowsImpl(settings, nullptr, getContext(), PruneLevel::None); + return totalRowsImpl(settings, {}, getContext(), PruneLevel::None); } -std::optional StorageHive::totalRowsByPartitionPredicate(const ActionsDAGPtr & filter_actions_dag, ContextPtr context_) const +std::optional StorageHive::totalRowsByPartitionPredicate(const ActionsDAG & filter_actions_dag, ContextPtr context_) const { - return totalRowsImpl(context_->getSettingsRef(), filter_actions_dag, context_, PruneLevel::Partition); + return totalRowsImpl(context_->getSettingsRef(), &filter_actions_dag, context_, PruneLevel::Partition); } void StorageHive::checkAlterIsPossible(const AlterCommands & commands, ContextPtr /*local_context*/) const @@ -1043,7 +1043,7 @@ void StorageHive::checkAlterIsPossible(const AlterCommands & commands, ContextPt } std::optional -StorageHive::totalRowsImpl(const Settings & settings, const ActionsDAGPtr & filter_actions_dag, ContextPtr context_, PruneLevel prune_level) const +StorageHive::totalRowsImpl(const Settings & settings, const ActionsDAG * filter_actions_dag, ContextPtr context_, PruneLevel prune_level) const { /// Row-based format like Text doesn't support totalRowsByPartitionPredicate if (!supportsSubsetOfColumns()) diff --git a/src/Storages/Hive/StorageHive.h b/src/Storages/Hive/StorageHive.h index 8a457dd6e01..e16df22e138 100644 --- a/src/Storages/Hive/StorageHive.h +++ b/src/Storages/Hive/StorageHive.h @@ -57,7 +57,7 @@ public: bool supportsSubsetOfColumns() const; std::optional totalRows(const Settings & settings) const override; - std::optional totalRowsByPartitionPredicate(const ActionsDAGPtr & filter_actions_dag, ContextPtr context_) const override; + std::optional totalRowsByPartitionPredicate(const ActionsDAG & filter_actions_dag, ContextPtr context_) const override; void checkAlterIsPossible(const AlterCommands & commands, ContextPtr local_context) const override; protected: @@ -90,7 +90,7 @@ private: HiveFiles collectHiveFiles( size_t max_threads, - const ActionsDAGPtr & filter_actions_dag, + const ActionsDAG * filter_actions_dag, const HiveTableMetadataPtr & hive_table_metadata, const HDFSFSPtr & fs, const ContextPtr & context_, @@ -98,7 +98,7 @@ private: HiveFiles collectHiveFilesFromPartition( const Apache::Hadoop::Hive::Partition & partition, - const ActionsDAGPtr & filter_actions_dag, + const ActionsDAG * filter_actions_dag, const HiveTableMetadataPtr & hive_table_metadata, const HDFSFSPtr & fs, const ContextPtr & context_, @@ -107,7 +107,7 @@ private: HiveFilePtr getHiveFileIfNeeded( const FileInfo & file_info, const FieldVector & fields, - const ActionsDAGPtr & filter_actions_dag, + const ActionsDAG * filter_actions_dag, const HiveTableMetadataPtr & hive_table_metadata, const ContextPtr & context_, PruneLevel prune_level = PruneLevel::Max) const; @@ -115,7 +115,7 @@ private: void lazyInitialize(); std::optional - totalRowsImpl(const Settings & settings, const ActionsDAGPtr & filter_actions_dag, ContextPtr context_, PruneLevel prune_level) const; + totalRowsImpl(const Settings & settings, const ActionsDAG * filter_actions_dag, ContextPtr context_, PruneLevel prune_level) const; String hive_metastore_url; diff --git a/src/Storages/IStorage.cpp b/src/Storages/IStorage.cpp index 1f7ac23eb82..57f79a2cd7f 100644 --- a/src/Storages/IStorage.cpp +++ b/src/Storages/IStorage.cpp @@ -236,7 +236,7 @@ StorageID IStorage::getStorageID() const return storage_id; } -ConditionSelectivityEstimator IStorage::getConditionSelectivityEstimatorByPredicate(const StorageSnapshotPtr &, const ActionsDAGPtr &, ContextPtr) const +ConditionSelectivityEstimator IStorage::getConditionSelectivityEstimatorByPredicate(const StorageSnapshotPtr &, const ActionsDAG *, ContextPtr) const { return {}; } diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h index 98afd844046..c86f18d5d3b 100644 --- a/src/Storages/IStorage.h +++ b/src/Storages/IStorage.h @@ -135,7 +135,7 @@ public: /// Returns true if the storage supports queries with the PREWHERE section. virtual bool supportsPrewhere() const { return false; } - virtual ConditionSelectivityEstimator getConditionSelectivityEstimatorByPredicate(const StorageSnapshotPtr &, const ActionsDAGPtr &, ContextPtr) const; + virtual ConditionSelectivityEstimator getConditionSelectivityEstimatorByPredicate(const StorageSnapshotPtr &, const ActionsDAG *, ContextPtr) const; /// Returns which columns supports PREWHERE, or empty std::nullopt if all columns is supported. /// This is needed for engines whose aggregates data from multiple tables, like Merge. @@ -682,7 +682,7 @@ public: virtual std::optional totalRows(const Settings &) const { return {}; } /// Same as above but also take partition predicate into account. - virtual std::optional totalRowsByPartitionPredicate(const ActionsDAGPtr &, ContextPtr) const { return {}; } + virtual std::optional totalRowsByPartitionPredicate(const ActionsDAG &, ContextPtr) const { return {}; } /// If it is possible to quickly determine exact number of bytes for the table on storage: /// - memory (approximated, resident) diff --git a/src/Storages/KVStorageUtils.cpp b/src/Storages/KVStorageUtils.cpp index 94319aef3b8..88783246e10 100644 --- a/src/Storages/KVStorageUtils.cpp +++ b/src/Storages/KVStorageUtils.cpp @@ -231,7 +231,7 @@ bool traverseDAGFilter( } std::pair getFilterKeys( - const String & primary_key, const DataTypePtr & primary_key_type, const ActionsDAGPtr & filter_actions_dag, const ContextPtr & context) + const String & primary_key, const DataTypePtr & primary_key_type, const std::optional & filter_actions_dag, const ContextPtr & context) { if (!filter_actions_dag) return {{}, true}; diff --git a/src/Storages/KVStorageUtils.h b/src/Storages/KVStorageUtils.h index e20a1ce4f37..64108290270 100644 --- a/src/Storages/KVStorageUtils.h +++ b/src/Storages/KVStorageUtils.h @@ -22,7 +22,7 @@ std::pair getFilterKeys( const std::string & primary_key, const DataTypePtr & primary_key_type, const SelectQueryInfo & query_info, const ContextPtr & context); std::pair getFilterKeys( - const String & primary_key, const DataTypePtr & primary_key_type, const ActionsDAGPtr & filter_actions_dag, const ContextPtr & context); + const String & primary_key, const DataTypePtr & primary_key_type, const std::optional & filter_actions_dag, const ContextPtr & context); template void fillColumns(const K & key, const V & value, size_t key_pos, const Block & header, MutableColumns & columns) diff --git a/src/Storages/KeyDescription.cpp b/src/Storages/KeyDescription.cpp index e03ecc05064..7e43966556e 100644 --- a/src/Storages/KeyDescription.cpp +++ b/src/Storages/KeyDescription.cpp @@ -160,7 +160,7 @@ KeyDescription KeyDescription::buildEmptyKey() { KeyDescription result; result.expression_list_ast = std::make_shared(); - result.expression = std::make_shared(std::make_unique(), ExpressionActionsSettings{}); + result.expression = std::make_shared(ActionsDAG(), ExpressionActionsSettings{}); return result; } diff --git a/src/Storages/MergeTree/IMergeTreeReader.cpp b/src/Storages/MergeTree/IMergeTreeReader.cpp index 4ad7f6ef991..264b2b397f4 100644 --- a/src/Storages/MergeTree/IMergeTreeReader.cpp +++ b/src/Storages/MergeTree/IMergeTreeReader.cpp @@ -163,8 +163,8 @@ void IMergeTreeReader::evaluateMissingDefaults(Block additional_columns, Columns if (dag) { dag->addMaterializingOutputActions(); - auto actions = std::make_shared< - ExpressionActions>(std::move(dag), + auto actions = std::make_shared( + std::move(*dag), ExpressionActionsSettings::fromSettings(data_part_info_for_read->getContext()->getSettingsRef())); actions->execute(additional_columns); } diff --git a/src/Storages/MergeTree/KeyCondition.cpp b/src/Storages/MergeTree/KeyCondition.cpp index 1efded3b064..d781345d834 100644 --- a/src/Storages/MergeTree/KeyCondition.cpp +++ b/src/Storages/MergeTree/KeyCondition.cpp @@ -628,7 +628,7 @@ static const ActionsDAG::Node & cloneASTWithInversionPushDown( if (const auto * index_hint = typeid_cast(adaptor->getFunction().get())) { const auto & index_hint_dag = index_hint->getActions(); - children = index_hint_dag->getOutputs(); + children = index_hint_dag.getOutputs(); for (auto & arg : children) arg = &cloneASTWithInversionPushDown(*arg, inverted_dag, to_inverted, context, need_inversion); @@ -729,7 +729,7 @@ Block KeyCondition::getBlockWithConstants( if (syntax_analyzer_result) { auto actions = ExpressionAnalyzer(query, syntax_analyzer_result, context).getConstActionsDAG(); - for (const auto & action_node : actions->getOutputs()) + for (const auto & action_node : actions.getOutputs()) { if (action_node->column) result.insert(ColumnWithTypeAndName{action_node->column, action_node->result_type, action_node->result_name}); diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 7b642c34f37..334c8c9c5ac 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -472,7 +472,7 @@ StoragePolicyPtr MergeTreeData::getStoragePolicy() const } ConditionSelectivityEstimator MergeTreeData::getConditionSelectivityEstimatorByPredicate( - const StorageSnapshotPtr & storage_snapshot, const ActionsDAGPtr & filter_dag, ContextPtr local_context) const + const StorageSnapshotPtr & storage_snapshot, const ActionsDAG * filter_dag, ContextPtr local_context) const { if (!local_context->getSettings().allow_statistics_optimize) return {}; @@ -487,7 +487,7 @@ ConditionSelectivityEstimator MergeTreeData::getConditionSelectivityEstimatorByP ASTPtr expression_ast; ConditionSelectivityEstimator result; - PartitionPruner partition_pruner(storage_snapshot->metadata, filter_dag.get(), local_context); + PartitionPruner partition_pruner(storage_snapshot->metadata, filter_dag, local_context); if (partition_pruner.isUseless()) { @@ -746,7 +746,7 @@ ExpressionActionsPtr MergeTreeData::getMinMaxExpr(const KeyDescription & partiti if (!partition_key.column_names.empty()) partition_key_columns = partition_key.expression->getRequiredColumnsWithTypes(); - return std::make_shared(std::make_unique(partition_key_columns), settings); + return std::make_shared(ActionsDAG(partition_key_columns), settings); } Names MergeTreeData::getMinMaxColumnsNames(const KeyDescription & partition_key) @@ -1134,7 +1134,7 @@ Block MergeTreeData::getBlockWithVirtualsForFilter( std::optional MergeTreeData::totalRowsByPartitionPredicateImpl( - const ActionsDAGPtr & filter_actions_dag, ContextPtr local_context, const DataPartsVector & parts) const + const ActionsDAG & filter_actions_dag, ContextPtr local_context, const DataPartsVector & parts) const { if (parts.empty()) return 0; @@ -1142,7 +1142,7 @@ std::optional MergeTreeData::totalRowsByPartitionPredicateImpl( auto metadata_snapshot = getInMemoryMetadataPtr(); auto virtual_columns_block = getBlockWithVirtualsForFilter(metadata_snapshot, {parts[0]}); - auto filter_dag = VirtualColumnUtils::splitFilterDagForAllowedInputs(filter_actions_dag->getOutputs().at(0), nullptr); + auto filter_dag = VirtualColumnUtils::splitFilterDagForAllowedInputs(filter_actions_dag.getOutputs().at(0), nullptr); if (!filter_dag) return {}; diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index 52916d85fef..e490e4b0bf9 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -426,7 +426,7 @@ public: bool supportsPrewhere() const override { return true; } - ConditionSelectivityEstimator getConditionSelectivityEstimatorByPredicate(const StorageSnapshotPtr &, const ActionsDAGPtr &, ContextPtr) const override; + ConditionSelectivityEstimator getConditionSelectivityEstimatorByPredicate(const StorageSnapshotPtr &, const ActionsDAG *, ContextPtr) const override; bool supportsFinal() const override; @@ -1227,7 +1227,7 @@ protected: boost::iterator_range range, const ColumnsDescription & storage_columns); std::optional totalRowsByPartitionPredicateImpl( - const ActionsDAGPtr & filter_actions_dag, ContextPtr context, const DataPartsVector & parts) const; + const ActionsDAG & filter_actions_dag, ContextPtr context, const DataPartsVector & parts) const; static decltype(auto) getStateModifier(DataPartState state) { diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index 61b8b6fdaa8..5a5b6d4a6e1 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -428,7 +428,7 @@ MergeTreeDataSelectSamplingData MergeTreeDataSelectExecutor::getSampling( ASTPtr query = sampling.filter_function; auto syntax_result = TreeRewriter(context).analyze(query, available_real_columns); - sampling.filter_expression = ExpressionAnalyzer(sampling.filter_function, syntax_result, context).getActionsDAG(false); + sampling.filter_expression = std::make_shared(ExpressionAnalyzer(sampling.filter_function, syntax_result, context).getActionsDAG(false)); } } @@ -466,7 +466,7 @@ void MergeTreeDataSelectExecutor::buildKeyConditionFromPartOffset( dag.get(), context, sample.getNames(), - std::make_shared(std::make_unique(sample.getColumnsWithTypeAndName()), ExpressionActionsSettings{}), + std::make_shared(ActionsDAG(sample.getColumnsWithTypeAndName()), ExpressionActionsSettings{}), {}}); } diff --git a/src/Storages/MergeTree/MergeTreeIndexSet.cpp b/src/Storages/MergeTree/MergeTreeIndexSet.cpp index 8d4ef69b1b9..ca31ffc9de5 100644 --- a/src/Storages/MergeTree/MergeTreeIndexSet.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexSet.cpp @@ -265,15 +265,15 @@ MergeTreeIndexConditionSet::MergeTreeIndexConditionSet( if (!set->buildOrderedSetInplace(context)) return; - auto filter_actions_dag = ActionsDAG::clone(filter_dag); - const auto * filter_actions_dag_node = filter_actions_dag->getOutputs().at(0); + auto filter_actions_dag = std::move(*ActionsDAG::clone(filter_dag)); + const auto * filter_actions_dag_node = filter_actions_dag.getOutputs().at(0); std::unordered_map node_to_result_node; - filter_actions_dag->getOutputs()[0] = &traverseDAG(*filter_actions_dag_node, filter_actions_dag, context, node_to_result_node); + filter_actions_dag.getOutputs()[0] = &traverseDAG(*filter_actions_dag_node, filter_actions_dag, context, node_to_result_node); - filter_actions_dag->removeUnusedActions(); + filter_actions_dag.removeUnusedActions(); - actions_output_column_name = filter_actions_dag->getOutputs().at(0)->result_name; + actions_output_column_name = filter_actions_dag.getOutputs().at(0)->result_name; actions = std::make_shared(std::move(filter_actions_dag)); } @@ -306,7 +306,7 @@ bool MergeTreeIndexConditionSet::mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx } -static const ActionsDAG::NodeRawConstPtrs & getArguments(const ActionsDAG::Node & node, const ActionsDAGPtr & result_dag_or_null, ActionsDAG::NodeRawConstPtrs * storage) +static const ActionsDAG::NodeRawConstPtrs & getArguments(const ActionsDAG::Node & node, ActionsDAG * result_dag_or_null, ActionsDAG::NodeRawConstPtrs * storage) { chassert(node.type == ActionsDAG::ActionType::FUNCTION); if (node.function_base->getName() != "indexHint") @@ -316,17 +316,17 @@ static const ActionsDAG::NodeRawConstPtrs & getArguments(const ActionsDAG::Node const auto & adaptor = typeid_cast(*node.function_base); const auto & index_hint = typeid_cast(*adaptor.getFunction()); if (!result_dag_or_null) - return index_hint.getActions()->getOutputs(); + return index_hint.getActions().getOutputs(); /// Import the DAG and map argument pointers. - ActionsDAGPtr actions_clone = ActionsDAG::clone(index_hint.getActions()); + auto actions_clone = std::move(*ActionsDAG::clone(&index_hint.getActions())); chassert(storage); - result_dag_or_null->mergeNodes(std::move(*actions_clone), storage); + result_dag_or_null->mergeNodes(std::move(actions_clone), storage); return *storage; } const ActionsDAG::Node & MergeTreeIndexConditionSet::traverseDAG(const ActionsDAG::Node & node, - ActionsDAGPtr & result_dag, + ActionsDAG & result_dag, const ContextPtr & context, std::unordered_map & node_to_result_node) const { @@ -348,7 +348,7 @@ const ActionsDAG::Node & MergeTreeIndexConditionSet::traverseDAG(const ActionsDA atom_node_ptr->type == ActionsDAG::ActionType::FUNCTION) { auto bit_wrapper_function = FunctionFactory::instance().get("__bitWrapperFunc", context); - result_node = &result_dag->addFunction(bit_wrapper_function, {atom_node_ptr}, {}); + result_node = &result_dag.addFunction(bit_wrapper_function, {atom_node_ptr}, {}); } } else @@ -359,14 +359,14 @@ const ActionsDAG::Node & MergeTreeIndexConditionSet::traverseDAG(const ActionsDA unknown_field_column_with_type.type = std::make_shared(); unknown_field_column_with_type.column = unknown_field_column_with_type.type->createColumnConst(1, UNKNOWN_FIELD); - result_node = &result_dag->addColumn(unknown_field_column_with_type); + result_node = &result_dag.addColumn(unknown_field_column_with_type); } node_to_result_node.emplace(&node, result_node); return *result_node; } -const ActionsDAG::Node * MergeTreeIndexConditionSet::atomFromDAG(const ActionsDAG::Node & node, ActionsDAGPtr & result_dag, const ContextPtr & context) const +const ActionsDAG::Node * MergeTreeIndexConditionSet::atomFromDAG(const ActionsDAG::Node & node, ActionsDAG & result_dag, const ContextPtr & context) const { /// Function, literal or column @@ -386,7 +386,7 @@ const ActionsDAG::Node * MergeTreeIndexConditionSet::atomFromDAG(const ActionsDA const auto * result_node = node_to_check; if (node.type != ActionsDAG::ActionType::INPUT) - result_node = &result_dag->addInput(column_name, node.result_type); + result_node = &result_dag.addInput(column_name, node.result_type); return result_node; } @@ -407,11 +407,11 @@ const ActionsDAG::Node * MergeTreeIndexConditionSet::atomFromDAG(const ActionsDA return nullptr; } - return &result_dag->addFunction(node.function_base, children, {}); + return &result_dag.addFunction(node.function_base, children, {}); } const ActionsDAG::Node * MergeTreeIndexConditionSet::operatorFromDAG(const ActionsDAG::Node & node, - ActionsDAGPtr & result_dag, + ActionsDAG & result_dag, const ContextPtr & context, std::unordered_map & node_to_result_node) const { @@ -429,7 +429,7 @@ const ActionsDAG::Node * MergeTreeIndexConditionSet::operatorFromDAG(const Actio auto function_name = node_to_check->function->getName(); ActionsDAG::NodeRawConstPtrs temp_ptrs_to_argument; - const auto & arguments = getArguments(*node_to_check, result_dag, &temp_ptrs_to_argument); + const auto & arguments = getArguments(*node_to_check, &result_dag, &temp_ptrs_to_argument); size_t arguments_size = arguments.size(); if (function_name == "not") @@ -440,7 +440,7 @@ const ActionsDAG::Node * MergeTreeIndexConditionSet::operatorFromDAG(const Actio const ActionsDAG::Node * argument = &traverseDAG(*arguments[0], result_dag, context, node_to_result_node); auto bit_swap_last_two_function = FunctionFactory::instance().get("__bitSwapLastTwo", context); - return &result_dag->addFunction(bit_swap_last_two_function, {argument}, {}); + return &result_dag.addFunction(bit_swap_last_two_function, {argument}, {}); } else if (function_name == "and" || function_name == "indexHint" || function_name == "or") { @@ -468,7 +468,7 @@ const ActionsDAG::Node * MergeTreeIndexConditionSet::operatorFromDAG(const Actio const auto * before_last_argument = children.back(); children.pop_back(); - last_argument = &result_dag->addFunction(function, {before_last_argument, last_argument}, {}); + last_argument = &result_dag.addFunction(function, {before_last_argument, last_argument}, {}); } return last_argument; diff --git a/src/Storages/MergeTree/MergeTreeIndexSet.h b/src/Storages/MergeTree/MergeTreeIndexSet.h index abd40b3cf9d..03b02515e47 100644 --- a/src/Storages/MergeTree/MergeTreeIndexSet.h +++ b/src/Storages/MergeTree/MergeTreeIndexSet.h @@ -93,16 +93,16 @@ public: ~MergeTreeIndexConditionSet() override = default; private: const ActionsDAG::Node & traverseDAG(const ActionsDAG::Node & node, - ActionsDAGPtr & result_dag, + ActionsDAG & result_dag, const ContextPtr & context, std::unordered_map & node_to_result_node) const; const ActionsDAG::Node * atomFromDAG(const ActionsDAG::Node & node, - ActionsDAGPtr & result_dag, + ActionsDAG & result_dag, const ContextPtr & context) const; const ActionsDAG::Node * operatorFromDAG(const ActionsDAG::Node & node, - ActionsDAGPtr & result_dag, + ActionsDAG & result_dag, const ContextPtr & context, std::unordered_map & node_to_result_node) const; diff --git a/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp b/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp index e924f853524..aec2f988e8d 100644 --- a/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp +++ b/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp @@ -80,7 +80,7 @@ PrewhereExprInfo MergeTreeSelectProcessor::getPrewhereActions(PrewhereInfoPtr pr PrewhereExprStep row_level_filter_step { .type = PrewhereExprStep::Filter, - .actions = std::make_shared(ActionsDAG::clone(prewhere_info->row_level_filter), actions_settings), + .actions = std::make_shared(std::move(*ActionsDAG::clone(&*prewhere_info->row_level_filter)), actions_settings), .filter_column_name = prewhere_info->row_level_column_name, .remove_filter_column = true, .need_filter = true, @@ -96,7 +96,7 @@ PrewhereExprInfo MergeTreeSelectProcessor::getPrewhereActions(PrewhereInfoPtr pr PrewhereExprStep prewhere_step { .type = PrewhereExprStep::Filter, - .actions = std::make_shared(ActionsDAG::clone(prewhere_info->prewhere_actions), actions_settings), + .actions = std::make_shared(std::move(*ActionsDAG::clone(&*prewhere_info->prewhere_actions)), actions_settings), .filter_column_name = prewhere_info->prewhere_column_name, .remove_filter_column = prewhere_info->remove_prewhere_column, .need_filter = prewhere_info->need_filter, diff --git a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp index 98b35a3ca2c..15917d59c9f 100644 --- a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp +++ b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp @@ -349,7 +349,7 @@ public: MergeTreeData::DataPartPtr data_part_, Names columns_to_read_, bool apply_deleted_mask_, - ActionsDAGPtr filter_, + std::optional filter_, ContextPtr context_, LoggerPtr log_) : ISourceStep(DataStream{.header = storage_snapshot_->getSampleBlockForColumns(columns_to_read_)}) @@ -376,7 +376,7 @@ public: { const auto & primary_key = storage_snapshot->metadata->getPrimaryKey(); const Names & primary_key_column_names = primary_key.column_names; - KeyCondition key_condition(filter.get(), context, primary_key_column_names, primary_key.expression); + KeyCondition key_condition(&*filter, context, primary_key_column_names, primary_key.expression); LOG_DEBUG(log, "Key condition: {}", key_condition.toString()); if (!key_condition.alwaysFalse()) @@ -417,7 +417,7 @@ private: MergeTreeData::DataPartPtr data_part; Names columns_to_read; bool apply_deleted_mask; - ActionsDAGPtr filter; + std::optional filter; ContextPtr context; LoggerPtr log; }; @@ -430,7 +430,7 @@ void createReadFromPartStep( MergeTreeData::DataPartPtr data_part, Names columns_to_read, bool apply_deleted_mask, - ActionsDAGPtr filter, + std::optional filter, ContextPtr context, LoggerPtr log) { diff --git a/src/Storages/MergeTree/MergeTreeSequentialSource.h b/src/Storages/MergeTree/MergeTreeSequentialSource.h index e6f055f776c..1b05512b9a3 100644 --- a/src/Storages/MergeTree/MergeTreeSequentialSource.h +++ b/src/Storages/MergeTree/MergeTreeSequentialSource.h @@ -38,7 +38,7 @@ void createReadFromPartStep( MergeTreeData::DataPartPtr data_part, Names columns_to_read, bool apply_deleted_mask, - ActionsDAGPtr filter, + std::optional filter, ContextPtr context, LoggerPtr log); diff --git a/src/Storages/MergeTree/MergeTreeSplitPrewhereIntoReadSteps.cpp b/src/Storages/MergeTree/MergeTreeSplitPrewhereIntoReadSteps.cpp index 25596b42951..116edf5b9cb 100644 --- a/src/Storages/MergeTree/MergeTreeSplitPrewhereIntoReadSteps.cpp +++ b/src/Storages/MergeTree/MergeTreeSplitPrewhereIntoReadSteps.cpp @@ -349,7 +349,7 @@ bool tryBuildPrewhereSteps(PrewhereInfoPtr prewhere_info, const ExpressionAction PrewhereExprStep new_step { .type = PrewhereExprStep::Filter, - .actions = std::make_shared(std::move(step.actions), actions_settings), + .actions = std::make_shared(std::move(*step.actions), actions_settings), .filter_column_name = step.column_name, /// Don't remove if it's in the list of original outputs .remove_filter_column = diff --git a/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp b/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp index a9a5fddace4..8c389f00780 100644 --- a/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp +++ b/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp @@ -112,7 +112,7 @@ void MergeTreeWhereOptimizer::optimize(SelectQueryInfo & select_query_info, cons LOG_DEBUG(log, "MergeTreeWhereOptimizer: condition \"{}\" moved to PREWHERE", select.prewhere()->formatForLogging(log_queries_cut_to_length)); } -MergeTreeWhereOptimizer::FilterActionsOptimizeResult MergeTreeWhereOptimizer::optimize(const ActionsDAGPtr & filter_dag, +MergeTreeWhereOptimizer::FilterActionsOptimizeResult MergeTreeWhereOptimizer::optimize(const ActionsDAG & filter_dag, const std::string & filter_column_name, const ContextPtr & context, bool is_final) @@ -126,7 +126,7 @@ MergeTreeWhereOptimizer::FilterActionsOptimizeResult MergeTreeWhereOptimizer::op where_optimizer_context.use_statistics = context->getSettingsRef().allow_statistics_optimize; RPNBuilderTreeContext tree_context(context); - RPNBuilderTreeNode node(&filter_dag->findInOutputs(filter_column_name), tree_context); + RPNBuilderTreeNode node(&filter_dag.findInOutputs(filter_column_name), tree_context); auto optimize_result = optimizeImpl(node, where_optimizer_context); if (!optimize_result) diff --git a/src/Storages/MergeTree/MergeTreeWhereOptimizer.h b/src/Storages/MergeTree/MergeTreeWhereOptimizer.h index ba6b4660924..a3d035675c6 100644 --- a/src/Storages/MergeTree/MergeTreeWhereOptimizer.h +++ b/src/Storages/MergeTree/MergeTreeWhereOptimizer.h @@ -52,7 +52,7 @@ public: bool fully_moved_to_prewhere = false; }; - FilterActionsOptimizeResult optimize(const ActionsDAGPtr & filter_dag, + FilterActionsOptimizeResult optimize(const ActionsDAG & filter_dag, const std::string & filter_column_name, const ContextPtr & context, bool is_final); diff --git a/src/Storages/MergeTree/RPNBuilder.cpp b/src/Storages/MergeTree/RPNBuilder.cpp index 4a18d606bb7..915a0e84902 100644 --- a/src/Storages/MergeTree/RPNBuilder.cpp +++ b/src/Storages/MergeTree/RPNBuilder.cpp @@ -398,7 +398,7 @@ size_t RPNBuilderFunctionTreeNode::getArgumentsSize() const { const auto * adaptor = typeid_cast(dag_node->function_base.get()); const auto * index_hint = typeid_cast(adaptor->getFunction().get()); - return index_hint->getActions()->getOutputs().size(); + return index_hint->getActions().getOutputs().size(); } return dag_node->children.size(); @@ -426,7 +426,7 @@ RPNBuilderTreeNode RPNBuilderFunctionTreeNode::getArgumentAt(size_t index) const { const auto & adaptor = typeid_cast(*dag_node->function_base); const auto & index_hint = typeid_cast(*adaptor.getFunction()); - return RPNBuilderTreeNode(index_hint.getActions()->getOutputs()[index], tree_context); + return RPNBuilderTreeNode(index_hint.getActions().getOutputs()[index], tree_context); } return RPNBuilderTreeNode(dag_node->children[index], tree_context); diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp index 503f542f2bd..d114608d8f1 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp @@ -77,9 +77,9 @@ StorageObjectStorageSource::~StorageObjectStorageSource() create_reader_pool->wait(); } -void StorageObjectStorageSource::setKeyCondition(const ActionsDAGPtr & filter_actions_dag, ContextPtr context_) +void StorageObjectStorageSource::setKeyCondition(const std::optional & filter_actions_dag, ContextPtr context_) { - setKeyConditionImpl(filter_actions_dag.get(), context_, read_from_format_info.format_header); + setKeyConditionImpl(filter_actions_dag, context_, read_from_format_info.format_header); } std::string StorageObjectStorageSource::getUniqueStoragePathIdentifier( diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.h b/src/Storages/ObjectStorage/StorageObjectStorageSource.h index fd7c7aa7102..01ce980feaa 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSource.h +++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.h @@ -45,7 +45,7 @@ public: String getName() const override { return name; } - void setKeyCondition(const ActionsDAGPtr & filter_actions_dag, ContextPtr context_) override; + void setKeyCondition(const std::optional & filter_actions_dag, ContextPtr context_) override; Chunk generate() override; diff --git a/src/Storages/ReadFinalForExternalReplicaStorage.cpp b/src/Storages/ReadFinalForExternalReplicaStorage.cpp index e1d52eefc20..393d3f3fbb9 100644 --- a/src/Storages/ReadFinalForExternalReplicaStorage.cpp +++ b/src/Storages/ReadFinalForExternalReplicaStorage.cpp @@ -79,7 +79,7 @@ void readFinalFromNestedStorage( auto step = std::make_unique( query_plan.getCurrentDataStream(), - actions, + std::move(actions), filter_column_name, false); diff --git a/src/Storages/SelectQueryInfo.h b/src/Storages/SelectQueryInfo.h index 5f48d5e795e..5276870c037 100644 --- a/src/Storages/SelectQueryInfo.h +++ b/src/Storages/SelectQueryInfo.h @@ -46,9 +46,9 @@ struct PrewhereInfo { /// Actions for row level security filter. Applied separately before prewhere_actions. /// This actions are separate because prewhere condition should not be executed over filtered rows. - ActionsDAGPtr row_level_filter; + std::optional row_level_filter; /// Actions which are executed on block in order to get filter column for prewhere step. - ActionsDAGPtr prewhere_actions; + std::optional prewhere_actions; String row_level_column_name; String prewhere_column_name; bool remove_prewhere_column = false; @@ -56,7 +56,7 @@ struct PrewhereInfo bool generated_by_optimizer = false; PrewhereInfo() = default; - explicit PrewhereInfo(ActionsDAGPtr prewhere_actions_, String prewhere_column_name_) + explicit PrewhereInfo(std::optional prewhere_actions_, String prewhere_column_name_) : prewhere_actions(std::move(prewhere_actions_)), prewhere_column_name(std::move(prewhere_column_name_)) {} std::string dump() const; @@ -66,10 +66,10 @@ struct PrewhereInfo PrewhereInfoPtr prewhere_info = std::make_shared(); if (row_level_filter) - prewhere_info->row_level_filter = ActionsDAG::clone(row_level_filter); + prewhere_info->row_level_filter = std::move(*ActionsDAG::clone(&*row_level_filter)); if (prewhere_actions) - prewhere_info->prewhere_actions = ActionsDAG::clone(prewhere_actions); + prewhere_info->prewhere_actions = std::move(*ActionsDAG::clone(&*prewhere_actions)); prewhere_info->row_level_column_name = row_level_column_name; prewhere_info->prewhere_column_name = prewhere_column_name; @@ -93,7 +93,7 @@ struct FilterInfo /// Same as FilterInfo, but with ActionsDAG. struct FilterDAGInfo { - ActionsDAGPtr actions; + std::optional actions; String column_name; bool do_remove_column = false; diff --git a/src/Storages/StorageBuffer.cpp b/src/Storages/StorageBuffer.cpp index 695b31d0c80..fdddd84ab59 100644 --- a/src/Storages/StorageBuffer.cpp +++ b/src/Storages/StorageBuffer.cpp @@ -312,7 +312,7 @@ void StorageBuffer::read( if (src_table_query_info.prewhere_info->row_level_filter) { src_table_query_info.prewhere_info->row_level_filter = ActionsDAG::merge( - std::move(*ActionsDAG::clone(actions_dag)), + std::move(*ActionsDAG::clone(&actions_dag)), std::move(*src_table_query_info.prewhere_info->row_level_filter)); src_table_query_info.prewhere_info->row_level_filter->removeUnusedActions(); @@ -321,7 +321,7 @@ void StorageBuffer::read( if (src_table_query_info.prewhere_info->prewhere_actions) { src_table_query_info.prewhere_info->prewhere_actions = ActionsDAG::merge( - std::move(*ActionsDAG::clone(actions_dag)), + std::move(*ActionsDAG::clone(&actions_dag)), std::move(*src_table_query_info.prewhere_info->prewhere_actions)); src_table_query_info.prewhere_info->prewhere_actions->removeUnusedActions(); @@ -353,7 +353,7 @@ void StorageBuffer::read( header.getColumnsWithTypeAndName(), ActionsDAG::MatchColumnsMode::Name); - auto converting = std::make_unique(query_plan.getCurrentDataStream(), actions_dag); + auto converting = std::make_unique(query_plan.getCurrentDataStream(), std::move(actions_dag)); converting->setStepDescription("Convert destination table columns to Buffer table structure"); query_plan.addStep(std::move(converting)); @@ -432,7 +432,7 @@ void StorageBuffer::read( { return std::make_shared( header, - std::make_shared(ActionsDAG::clone(query_info.prewhere_info->row_level_filter), actions_settings), + std::make_shared(std::move(*ActionsDAG::clone(&*query_info.prewhere_info->row_level_filter)), actions_settings), query_info.prewhere_info->row_level_column_name, false); }); @@ -442,7 +442,7 @@ void StorageBuffer::read( { return std::make_shared( header, - std::make_shared(ActionsDAG::clone(query_info.prewhere_info->prewhere_actions), actions_settings), + std::make_shared(std::move(*ActionsDAG::clone(&*query_info.prewhere_info->prewhere_actions)), actions_settings), query_info.prewhere_info->prewhere_column_name, query_info.prewhere_info->remove_prewhere_column); }); @@ -472,7 +472,7 @@ void StorageBuffer::read( result_header.getColumnsWithTypeAndName(), ActionsDAG::MatchColumnsMode::Name); - auto converting = std::make_unique(query_plan.getCurrentDataStream(), convert_actions_dag); + auto converting = std::make_unique(query_plan.getCurrentDataStream(), std::move(convert_actions_dag)); query_plan.addStep(std::move(converting)); } diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index 3d91da240cc..6f8a9189941 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -1074,7 +1074,7 @@ std::optional StorageDistributed::distributedWriteBetweenDistribu return pipeline; } -static ActionsDAGPtr getFilterFromQuery(const ASTPtr & ast, ContextPtr context) +static std::optional getFilterFromQuery(const ASTPtr & ast, ContextPtr context) { QueryPlan plan; SelectQueryOptions options; @@ -1118,7 +1118,7 @@ static ActionsDAGPtr getFilterFromQuery(const ASTPtr & ast, ContextPtr context) } if (!source) - return nullptr; + return {}; return source->detachFilterActionsDAG(); } diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index 2422bcd700b..4611371a471 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -1233,9 +1233,9 @@ StorageFileSource::~StorageFileSource() beforeDestroy(); } -void StorageFileSource::setKeyCondition(const ActionsDAGPtr & filter_actions_dag, ContextPtr context_) +void StorageFileSource::setKeyCondition(const std::optional & filter_actions_dag, ContextPtr context_) { - setKeyConditionImpl(filter_actions_dag.get(), context_, block_for_format); + setKeyConditionImpl(filter_actions_dag, context_, block_for_format); } diff --git a/src/Storages/StorageFile.h b/src/Storages/StorageFile.h index ac094aeb489..e9424527997 100644 --- a/src/Storages/StorageFile.h +++ b/src/Storages/StorageFile.h @@ -265,7 +265,7 @@ private: return storage->getName(); } - void setKeyCondition(const ActionsDAGPtr & filter_actions_dag, ContextPtr context_) override; + void setKeyCondition(const std::optional & filter_actions_dag, ContextPtr context_) override; bool tryGetCountFromCache(const struct stat & file_stat); diff --git a/src/Storages/StorageMaterializedView.cpp b/src/Storages/StorageMaterializedView.cpp index 316f398b476..8b6a9a4d4bb 100644 --- a/src/Storages/StorageMaterializedView.cpp +++ b/src/Storages/StorageMaterializedView.cpp @@ -273,8 +273,8 @@ void StorageMaterializedView::read( * They may be added in case of distributed query with JOIN. * In that case underlying table returns joined columns as well. */ - converting_actions->removeUnusedActions(); - auto converting_step = std::make_unique(query_plan.getCurrentDataStream(), converting_actions); + converting_actions.removeUnusedActions(); + auto converting_step = std::make_unique(query_plan.getCurrentDataStream(), std::move(converting_actions)); converting_step->setStepDescription("Convert target table structure to MaterializedView structure"); query_plan.addStep(std::move(converting_step)); } diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp index c3fdad3a8f2..374abd0b0a5 100644 --- a/src/Storages/StorageMerge.cpp +++ b/src/Storages/StorageMerge.cpp @@ -407,7 +407,7 @@ void ReadFromMerge::addFilter(FilterDAGInfo filter) { output_stream->header = FilterTransform::transformHeader( output_stream->header, - filter.actions.get(), + filter.actions ? &*filter.actions : nullptr, filter.column_name, filter.do_remove_column); pushed_down_filters.push_back(std::move(filter)); @@ -628,7 +628,7 @@ std::vector ReadFromMerge::createChildrenPlans(SelectQ auto alias_actions = ExpressionAnalyzer(required_columns_expr_list, syntax_result, context).getActionsDAG(true); - column_names_as_aliases = alias_actions->getRequiredColumns().getNames(); + column_names_as_aliases = alias_actions.getRequiredColumns().getNames(); if (column_names_as_aliases.empty()) column_names_as_aliases.push_back(ExpressionActions::getSmallestColumn(storage_metadata_snapshot->getColumns().getAllPhysical()).name); } @@ -662,7 +662,7 @@ std::vector ReadFromMerge::createChildrenPlans(SelectQ { auto filter_step = std::make_unique( child.plan.getCurrentDataStream(), - ActionsDAG::clone(filter_info.actions), + std::move(*ActionsDAG::clone(&*filter_info.actions)), filter_info.column_name, filter_info.do_remove_column); @@ -1060,7 +1060,7 @@ void ReadFromMerge::addVirtualColumns( column.column = column.type->createColumnConst(0, Field(database_name)); auto adding_column_dag = ActionsDAG::makeAddingColumnActions(std::move(column)); - auto expression_step = std::make_unique(child.plan.getCurrentDataStream(), adding_column_dag); + auto expression_step = std::make_unique(child.plan.getCurrentDataStream(), std::move(adding_column_dag)); child.plan.addStep(std::move(expression_step)); plan_header = child.plan.getCurrentDataStream().header; } @@ -1074,7 +1074,7 @@ void ReadFromMerge::addVirtualColumns( column.column = column.type->createColumnConst(0, Field(table_name)); auto adding_column_dag = ActionsDAG::makeAddingColumnActions(std::move(column)); - auto expression_step = std::make_unique(child.plan.getCurrentDataStream(), adding_column_dag); + auto expression_step = std::make_unique(child.plan.getCurrentDataStream(), std::move(adding_column_dag)); child.plan.addStep(std::move(expression_step)); plan_header = child.plan.getCurrentDataStream().header; } @@ -1089,7 +1089,7 @@ void ReadFromMerge::addVirtualColumns( column.column = column.type->createColumnConst(0, Field(database_name)); auto adding_column_dag = ActionsDAG::makeAddingColumnActions(std::move(column)); - auto expression_step = std::make_unique(child.plan.getCurrentDataStream(), adding_column_dag); + auto expression_step = std::make_unique(child.plan.getCurrentDataStream(), std::move(adding_column_dag)); child.plan.addStep(std::move(expression_step)); plan_header = child.plan.getCurrentDataStream().header; } @@ -1102,7 +1102,7 @@ void ReadFromMerge::addVirtualColumns( column.column = column.type->createColumnConst(0, Field(table_name)); auto adding_column_dag = ActionsDAG::makeAddingColumnActions(std::move(column)); - auto expression_step = std::make_unique(child.plan.getCurrentDataStream(), adding_column_dag); + auto expression_step = std::make_unique(child.plan.getCurrentDataStream(), std::move(adding_column_dag)); child.plan.addStep(std::move(expression_step)); plan_header = child.plan.getCurrentDataStream().header; } @@ -1240,7 +1240,7 @@ ReadFromMerge::RowPolicyData::RowPolicyData(RowPolicyFilterPtr row_policy_filter auto expression_analyzer = ExpressionAnalyzer{expr, syntax_result, local_context}; actions_dag = expression_analyzer.getActionsDAG(false /* add_aliases */, false /* project_result */); - filter_actions = std::make_shared(ActionsDAG::clone(actions_dag), + filter_actions = std::make_shared(std::move(*ActionsDAG::clone(&actions_dag)), ExpressionActionsSettings::fromContext(local_context, CompileExpressions::yes)); const auto & required_columns = filter_actions->getRequiredColumnsWithTypes(); const auto & sample_block_columns = filter_actions->getSampleBlock().getNamesAndTypesList(); @@ -1278,12 +1278,12 @@ void ReadFromMerge::RowPolicyData::extendNames(Names & names) const void ReadFromMerge::RowPolicyData::addStorageFilter(SourceStepWithFilter * step) const { - step->addFilter(ActionsDAG::clone(actions_dag), filter_column_name); + step->addFilter(ActionsDAG::clone(&actions_dag), filter_column_name); } void ReadFromMerge::RowPolicyData::addFilterTransform(QueryPlan & plan) const { - auto filter_step = std::make_unique(plan.getCurrentDataStream(), ActionsDAG::clone(actions_dag), filter_column_name, true /* remove filter column */); + auto filter_step = std::make_unique(plan.getCurrentDataStream(), std::move(*ActionsDAG::clone(&actions_dag)), filter_column_name, true /* remove filter column */); plan.addStep(std::move(filter_step)); } @@ -1476,7 +1476,7 @@ void ReadFromMerge::convertAndFilterSourceStream( { pipe_columns.emplace_back(NameAndTypePair(alias.name, alias.type)); - auto actions_dag = std::make_unique(pipe_columns); + ActionsDAG actions_dag(pipe_columns); QueryTreeNodePtr query_tree = buildQueryTree(alias.expression, local_context); query_tree->setAlias(alias.name); @@ -1485,12 +1485,12 @@ void ReadFromMerge::convertAndFilterSourceStream( query_analysis_pass.run(query_tree, local_context); PlannerActionsVisitor actions_visitor(modified_query_info.planner_context, false /*use_column_identifier_as_action_node_name*/); - const auto & nodes = actions_visitor.visit(*actions_dag, query_tree); + const auto & nodes = actions_visitor.visit(actions_dag, query_tree); if (nodes.size() != 1) throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected to have 1 output but got {}", nodes.size()); - actions_dag->addOrReplaceInOutputs(actions_dag->addAlias(*nodes.front(), alias.name)); + actions_dag.addOrReplaceInOutputs(actions_dag.addAlias(*nodes.front(), alias.name)); auto expression_step = std::make_unique(child.plan.getCurrentDataStream(), std::move(actions_dag)); child.plan.addStep(std::move(expression_step)); } @@ -1506,7 +1506,7 @@ void ReadFromMerge::convertAndFilterSourceStream( auto dag = std::make_shared(pipe_columns); auto actions_dag = expression_analyzer.getActionsDAG(true, false); - auto expression_step = std::make_unique(child.plan.getCurrentDataStream(), actions_dag); + auto expression_step = std::make_unique(child.plan.getCurrentDataStream(), std::move(actions_dag)); child.plan.addStep(std::move(expression_step)); } } @@ -1524,7 +1524,7 @@ void ReadFromMerge::convertAndFilterSourceStream( header.getColumnsWithTypeAndName(), convert_actions_match_columns_mode); - auto expression_step = std::make_unique(child.plan.getCurrentDataStream(), convert_actions_dag); + auto expression_step = std::make_unique(child.plan.getCurrentDataStream(), std::move(convert_actions_dag)); child.plan.addStep(std::move(expression_step)); } diff --git a/src/Storages/StorageMerge.h b/src/Storages/StorageMerge.h index 94b34256d02..d6f2deca7fd 100644 --- a/src/Storages/StorageMerge.h +++ b/src/Storages/StorageMerge.h @@ -225,7 +225,7 @@ private: private: std::string filter_column_name; // complex filter, may contain logic operations - ActionsDAGPtr actions_dag; + ActionsDAG actions_dag; ExpressionActionsPtr filter_actions; StorageMetadataPtr storage_metadata_snapshot; }; diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index 9352f772ce1..b1a8a81914c 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -245,7 +245,7 @@ std::optional StorageMergeTree::totalRows(const Settings &) const return getTotalActiveSizeInRows(); } -std::optional StorageMergeTree::totalRowsByPartitionPredicate(const ActionsDAGPtr & filter_actions_dag, ContextPtr local_context) const +std::optional StorageMergeTree::totalRowsByPartitionPredicate(const ActionsDAG & filter_actions_dag, ContextPtr local_context) const { auto parts = getVisibleDataPartsVector(local_context); return totalRowsByPartitionPredicateImpl(filter_actions_dag, local_context, parts); diff --git a/src/Storages/StorageMergeTree.h b/src/Storages/StorageMergeTree.h index 4d819508934..56324449b34 100644 --- a/src/Storages/StorageMergeTree.h +++ b/src/Storages/StorageMergeTree.h @@ -65,7 +65,7 @@ public: size_t num_streams) override; std::optional totalRows(const Settings &) const override; - std::optional totalRowsByPartitionPredicate(const ActionsDAGPtr & filter_actions_dag, ContextPtr) const override; + std::optional totalRowsByPartitionPredicate(const ActionsDAG & filter_actions_dag, ContextPtr) const override; std::optional totalBytes(const Settings &) const override; std::optional totalBytesUncompressed(const Settings &) const override; diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index db58d0081c6..b472710b6d8 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -5556,7 +5556,7 @@ std::optional StorageReplicatedMergeTree::totalRows(const Settings & set return res; } -std::optional StorageReplicatedMergeTree::totalRowsByPartitionPredicate(const ActionsDAGPtr & filter_actions_dag, ContextPtr local_context) const +std::optional StorageReplicatedMergeTree::totalRowsByPartitionPredicate(const ActionsDAG & filter_actions_dag, ContextPtr local_context) const { DataPartsVector parts; foreachActiveParts([&](auto & part) { parts.push_back(part); }, local_context->getSettingsRef().select_sequential_consistency); diff --git a/src/Storages/StorageReplicatedMergeTree.h b/src/Storages/StorageReplicatedMergeTree.h index f96206ce657..2e54f17d5d5 100644 --- a/src/Storages/StorageReplicatedMergeTree.h +++ b/src/Storages/StorageReplicatedMergeTree.h @@ -159,7 +159,7 @@ public: size_t num_streams) override; std::optional totalRows(const Settings & settings) const override; - std::optional totalRowsByPartitionPredicate(const ActionsDAGPtr & filter_actions_dag, ContextPtr context) const override; + std::optional totalRowsByPartitionPredicate(const ActionsDAG & filter_actions_dag, ContextPtr context) const override; std::optional totalBytes(const Settings & settings) const override; std::optional totalBytesUncompressed(const Settings & settings) const override; diff --git a/src/Storages/StorageTableFunction.h b/src/Storages/StorageTableFunction.h index 9507eb6ed8a..345dd62c687 100644 --- a/src/Storages/StorageTableFunction.h +++ b/src/Storages/StorageTableFunction.h @@ -112,7 +112,7 @@ public: auto step = std::make_unique( query_plan.getCurrentDataStream(), - convert_actions_dag); + std::move(convert_actions_dag)); step->setStepDescription("Converting columns"); query_plan.addStep(std::move(step)); diff --git a/src/Storages/StorageURL.h b/src/Storages/StorageURL.h index c336f597f41..ec1f803750e 100644 --- a/src/Storages/StorageURL.h +++ b/src/Storages/StorageURL.h @@ -185,9 +185,9 @@ public: String getName() const override { return name; } - void setKeyCondition(const ActionsDAGPtr & filter_actions_dag, ContextPtr context_) override + void setKeyCondition(const std::optional & filter_actions_dag, ContextPtr context_) override { - setKeyConditionImpl(filter_actions_dag.get(), context_, block_for_format); + setKeyConditionImpl(filter_actions_dag, context_, block_for_format); } Chunk generate() override; diff --git a/src/Storages/StorageValues.cpp b/src/Storages/StorageValues.cpp index 4d73f8e5c87..c1ca6244866 100644 --- a/src/Storages/StorageValues.cpp +++ b/src/Storages/StorageValues.cpp @@ -48,13 +48,13 @@ Pipe StorageValues::read( if (!prepared_pipe.empty()) { - auto dag = std::make_unique(prepared_pipe.getHeader().getColumnsWithTypeAndName()); + ActionsDAG dag(prepared_pipe.getHeader().getColumnsWithTypeAndName()); ActionsDAG::NodeRawConstPtrs outputs; outputs.reserve(column_names.size()); for (const auto & name : column_names) - outputs.push_back(dag->getOutputs()[prepared_pipe.getHeader().getPositionByName(name)]); + outputs.push_back(dag.getOutputs()[prepared_pipe.getHeader().getPositionByName(name)]); - dag->getOutputs().swap(outputs); + dag.getOutputs().swap(outputs); auto expression = std::make_shared(std::move(dag)); prepared_pipe.addSimpleTransform([&](const Block & header) diff --git a/src/Storages/StorageView.cpp b/src/Storages/StorageView.cpp index 2c0d5c5ca85..e2c4d67c8d1 100644 --- a/src/Storages/StorageView.cpp +++ b/src/Storages/StorageView.cpp @@ -177,8 +177,8 @@ void StorageView::read( /// It's expected that the columns read from storage are not constant. /// Because method 'getSampleBlockForColumns' is used to obtain a structure of result in InterpreterSelectQuery. - auto materializing_actions = std::make_unique(query_plan.getCurrentDataStream().header.getColumnsWithTypeAndName()); - materializing_actions->addMaterializingOutputActions(); + ActionsDAG materializing_actions(query_plan.getCurrentDataStream().header.getColumnsWithTypeAndName()); + materializing_actions.addMaterializingOutputActions(); auto materializing = std::make_unique(query_plan.getCurrentDataStream(), std::move(materializing_actions)); materializing->setStepDescription("Materialize constants after VIEW subquery"); @@ -203,7 +203,7 @@ void StorageView::read( expected_header.getColumnsWithTypeAndName(), ActionsDAG::MatchColumnsMode::Name); - auto converting = std::make_unique(query_plan.getCurrentDataStream(), convert_actions_dag); + auto converting = std::make_unique(query_plan.getCurrentDataStream(), std::move(convert_actions_dag)); converting->setStepDescription("Convert VIEW subquery result to VIEW table structure"); query_plan.addStep(std::move(converting)); } diff --git a/src/Storages/System/StorageSystemStackTrace.cpp b/src/Storages/System/StorageSystemStackTrace.cpp index ba7433fb9ae..f4e6fe3df5f 100644 --- a/src/Storages/System/StorageSystemStackTrace.cpp +++ b/src/Storages/System/StorageSystemStackTrace.cpp @@ -276,7 +276,7 @@ public: StackTraceSource( const Names & column_names, Block header_, - ActionsDAGPtr && filter_dag_, + std::optional filter_dag_, ContextPtr context_, UInt64 max_block_size_, LoggerPtr log_) @@ -422,7 +422,7 @@ protected: private: ContextPtr context; Block header; - const ActionsDAGPtr filter_dag; + const std::optional filter_dag; const ActionsDAG::Node * predicate; const size_t max_block_size; diff --git a/src/Storages/TTLDescription.cpp b/src/Storages/TTLDescription.cpp index 56f65b57367..a32eef20aed 100644 --- a/src/Storages/TTLDescription.cpp +++ b/src/Storages/TTLDescription.cpp @@ -176,12 +176,12 @@ static ExpressionAndSets buildExpressionAndSets(ASTPtr & ast, const NamesAndType ExpressionAnalyzer analyzer(ast, syntax_analyzer_result, context_copy); auto dag = analyzer.getActionsDAG(false); - const auto * col = &dag->findInOutputs(ast->getColumnName()); + const auto * col = &dag.findInOutputs(ast->getColumnName()); if (col->result_name != ttl_string) - col = &dag->addAlias(*col, ttl_string); + col = &dag.addAlias(*col, ttl_string); - dag->getOutputs() = {col}; - dag->removeUnusedActions(); + dag.getOutputs() = {col}; + dag.removeUnusedActions(); result.expression = std::make_shared(std::move(dag), ExpressionActionsSettings::fromContext(context_copy)); result.sets = analyzer.getPreparedSets(); diff --git a/src/Storages/VirtualColumnUtils.cpp b/src/Storages/VirtualColumnUtils.cpp index 1bd5e80a4f9..7f54c6a6ee3 100644 --- a/src/Storages/VirtualColumnUtils.cpp +++ b/src/Storages/VirtualColumnUtils.cpp @@ -80,7 +80,7 @@ void buildSetsForDAG(const ActionsDAGPtr & dag, const ContextPtr & context) void filterBlockWithDAG(const ActionsDAGPtr & dag, Block & block, ContextPtr context) { buildSetsForDAG(dag, context); - auto actions = std::make_shared(ActionsDAG::clone(dag)); + auto actions = std::make_shared(std::move(*ActionsDAG::clone(dag))); Block block_with_filter = block; actions->execute(block_with_filter, /*dry_run=*/ false, /*allow_duplicates_in_input=*/ true); @@ -318,9 +318,9 @@ static const ActionsDAG::Node * splitFilterNodeForAllowedInputs( { if (const auto * index_hint = typeid_cast(adaptor->getFunction().get())) { - auto index_hint_dag = ActionsDAG::clone(index_hint->getActions()); + auto index_hint_dag = std::move(*ActionsDAG::clone(&index_hint->getActions())); ActionsDAG::NodeRawConstPtrs atoms; - for (const auto & output : index_hint_dag->getOutputs()) + for (const auto & output : index_hint_dag.getOutputs()) if (const auto * child_copy = splitFilterNodeForAllowedInputs(output, allowed_inputs, additional_nodes)) atoms.push_back(child_copy); @@ -331,13 +331,13 @@ static const ActionsDAG::Node * splitFilterNodeForAllowedInputs( if (atoms.size() > 1) { FunctionOverloadResolverPtr func_builder_and = std::make_unique(std::make_shared()); - res = &index_hint_dag->addFunction(func_builder_and, atoms, {}); + res = &index_hint_dag.addFunction(func_builder_and, atoms, {}); } if (!res->result_type->equals(*node->result_type)) - res = &index_hint_dag->addCast(*res, node->result_type, {}); + res = &index_hint_dag.addCast(*res, node->result_type, {}); - additional_nodes.splice(additional_nodes.end(), ActionsDAG::detachNodes(std::move(*index_hint_dag))); + additional_nodes.splice(additional_nodes.end(), ActionsDAG::detachNodes(std::move(index_hint_dag))); return res; } } diff --git a/src/Storages/WindowView/StorageWindowView.cpp b/src/Storages/WindowView/StorageWindowView.cpp index 8f39f0da5af..30ae1f95593 100644 --- a/src/Storages/WindowView/StorageWindowView.cpp +++ b/src/Storages/WindowView/StorageWindowView.cpp @@ -1130,7 +1130,7 @@ void StorageWindowView::read( { auto converting_actions = ActionsDAG::makeConvertingActions( target_header.getColumnsWithTypeAndName(), wv_header.getColumnsWithTypeAndName(), ActionsDAG::MatchColumnsMode::Name); - auto converting_step = std::make_unique(query_plan.getCurrentDataStream(), converting_actions); + auto converting_step = std::make_unique(query_plan.getCurrentDataStream(), std::move(converting_actions)); converting_step->setStepDescription("Convert Target table structure to WindowView structure"); query_plan.addStep(std::move(converting_step)); } From 1237f93182db21f00df9ca7913619ee63d75850b Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Mon, 8 Jul 2024 15:06:52 +0000 Subject: [PATCH 025/661] Fixing some crashes. --- src/Interpreters/ExpressionActions.cpp | 2 +- src/Planner/Planner.cpp | 8 +++++--- src/Planner/PlannerJoins.cpp | 8 ++++---- src/Planner/PlannerJoins.h | 4 ++-- src/Processors/QueryPlan/ReadFromMergeTree.cpp | 2 +- 5 files changed, 13 insertions(+), 11 deletions(-) diff --git a/src/Interpreters/ExpressionActions.cpp b/src/Interpreters/ExpressionActions.cpp index 399f4f2ff4f..1c6c3f2556b 100644 --- a/src/Interpreters/ExpressionActions.cpp +++ b/src/Interpreters/ExpressionActions.cpp @@ -59,7 +59,7 @@ ExpressionActions::ExpressionActions(ActionsDAG actions_dag_, const ExpressionAc #if USE_EMBEDDED_COMPILER if (settings.can_compile_expressions && settings.compile_expressions == CompileExpressions::yes) - actions_dag->compileExpressions(settings.min_count_to_compile_expression, lazy_executed_nodes); + actions_dag.compileExpressions(settings.min_count_to_compile_expression, lazy_executed_nodes); #endif linearizeActions(lazy_executed_nodes); diff --git a/src/Planner/Planner.cpp b/src/Planner/Planner.cpp index 48e42099ce8..0b10cef82ce 100644 --- a/src/Planner/Planner.cpp +++ b/src/Planner/Planner.cpp @@ -212,9 +212,11 @@ FiltersForTableExpressionMap collectFiltersForAnalysis(const QueryTreeNodePtr & if (!read_from_dummy) continue; - auto filter_actions = read_from_dummy->detachFilterActionsDAG(); - const auto & table_node = dummy_storage_to_table.at(&read_from_dummy->getStorage()); - res[table_node] = FiltersForTableExpression{std::move(filter_actions), read_from_dummy->getPrewhereInfo()}; + if (auto filter_actions = read_from_dummy->detachFilterActionsDAG()) + { + const auto & table_node = dummy_storage_to_table.at(&read_from_dummy->getStorage()); + res[table_node] = FiltersForTableExpression{std::move(filter_actions), read_from_dummy->getPrewhereInfo()}; + } } return res; diff --git a/src/Planner/PlannerJoins.cpp b/src/Planner/PlannerJoins.cpp index db9678d91a6..7772336f7c0 100644 --- a/src/Planner/PlannerJoins.cpp +++ b/src/Planner/PlannerJoins.cpp @@ -588,11 +588,11 @@ JoinClausesAndActions buildJoinClausesAndActions( } } - result.left_join_expressions_actions = std::move(left_join_actions); - //result.left_join_tmp_expression_actions = std::move(left_join_actions); + result.left_join_expressions_actions = std::move(*ActionsDAG::clone(&left_join_actions)); + result.left_join_tmp_expression_actions = std::move(left_join_actions); result.left_join_expressions_actions.removeUnusedActions(join_left_actions_names); - result.right_join_expressions_actions = std::move(right_join_actions); - //result.right_join_tmp_expression_actions = std::move(right_join_actions); + result.right_join_expressions_actions = std::move(*ActionsDAG::clone(&right_join_actions)); + result.right_join_tmp_expression_actions = std::move(right_join_actions); result.right_join_expressions_actions.removeUnusedActions(join_right_actions_names); if (is_inequal_join) diff --git a/src/Planner/PlannerJoins.h b/src/Planner/PlannerJoins.h index 3735c373acc..d8665ab7739 100644 --- a/src/Planner/PlannerJoins.h +++ b/src/Planner/PlannerJoins.h @@ -182,8 +182,8 @@ struct JoinClausesAndActions /// Join clauses. Actions dag nodes point into join_expression_actions. JoinClauses join_clauses; /// Whole JOIN ON section expressions - // ActionsDAGPtr left_join_tmp_expression_actions; - // ActionsDAGPtr right_join_tmp_expression_actions; + ActionsDAG left_join_tmp_expression_actions; + ActionsDAG right_join_tmp_expression_actions; /// Left join expressions actions ActionsDAG left_join_expressions_actions; /// Right join expressions actions diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index bc878e7ee49..9ca79fde26f 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -1520,7 +1520,7 @@ void ReadFromMergeTree::applyFilters(ActionDAGNodes added_filter_nodes) /// TODO: Get rid of filter_actions_dag in query_info after we move analysis of /// parallel replicas and unused shards into optimization, similar to projection analysis. if (filter_actions_dag) - query_info.filter_actions_dag = std::make_shared(std::move(*filter_actions_dag)); + query_info.filter_actions_dag = std::make_shared(std::move(*ActionsDAG::clone(&*filter_actions_dag))); buildIndexes( indexes, From 849fb83c9770fedb937dc59df73c0cc172e115bf Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Tue, 9 Jul 2024 17:37:17 +0800 Subject: [PATCH 026/661] add function printf --- .../functions/string-replace-functions.md | 25 ++ src/Functions/printf.cpp | 308 ++++++++++++++++++ .../0_stateless/032010_printf.reference | 16 + tests/queries/0_stateless/032010_printf.sql | 24 ++ 4 files changed, 373 insertions(+) create mode 100644 src/Functions/printf.cpp create mode 100644 tests/queries/0_stateless/032010_printf.reference create mode 100644 tests/queries/0_stateless/032010_printf.sql diff --git a/docs/en/sql-reference/functions/string-replace-functions.md b/docs/en/sql-reference/functions/string-replace-functions.md index 8793ebdd1a3..177790c983e 100644 --- a/docs/en/sql-reference/functions/string-replace-functions.md +++ b/docs/en/sql-reference/functions/string-replace-functions.md @@ -223,3 +223,28 @@ SELECT translateUTF8('Münchener Straße', 'üß', 'us') AS res; │ Munchener Strase │ └──────────────────┘ ``` + +## printf + +The `printf` function formats the given string with the values (strings, integers, floating-points etc.) listed in the arguments, similar to printf function in C++. The format string can contain format specifiers starting with `%` character. Anything not contained in `%` and the following format specifier is considered literal text and copied verbatim into the output. Literal `%` character can be escaped by `%%`. + +**Syntax** + +``` sql +printf(format, arg1, arg2, ...) +``` + +**Example** + +Query: + +``` sql +select printf('%%%s %s %d', 'Hello', 'World', 2024); +``` + + +``` response +┌─printf('%%%s %s %d', 'Hello', 'World', 2024)─┐ +│ %Hello World 2024 │ +└──────────────────────────────────────────────┘ +``` diff --git a/src/Functions/printf.cpp b/src/Functions/printf.cpp new file mode 100644 index 00000000000..cb21d5e39ad --- /dev/null +++ b/src/Functions/printf.cpp @@ -0,0 +1,308 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +namespace DB +{ +namespace ErrorCodes +{ + extern const int ILLEGAL_COLUMN; + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; + extern const int ILLEGAL_TYPE_OF_ARGUMENT; +} + +namespace +{ + +class FunctionPrintf : public IFunction +{ +private: + ContextPtr context; + FunctionOverloadResolverPtr function_concat; + + struct Instruction + { + std::string_view format; + size_t rows; + bool is_literal; /// format is literal string without any argument + ColumnWithTypeAndName input; /// Only used when is_literal is false + + ColumnWithTypeAndName execute() + { + if (is_literal) + return executeLiteral(format); + else if (isColumnConst(*input.column)) + return executeConstant(input); + else + return executeNonconstant(input); + } + + String toString() const + { + std::ostringstream oss; + oss << "format:" << format << ", rows:" << rows << ", is_literal:" << is_literal << ", input:" << input.dumpStructure() + << std::endl; + return oss.str(); + } + + private: + ColumnWithTypeAndName executeLiteral(std::string_view literal) + { + ColumnWithTypeAndName res; + auto str_col = ColumnString::create(); + str_col->insert(fmt::sprintf(literal)); + res.column = ColumnConst::create(std::move(str_col), rows); + res.type = std::make_shared(); + return res; + } + + ColumnWithTypeAndName executeConstant(const ColumnWithTypeAndName & arg) + { + ColumnWithTypeAndName tmp_arg = arg; + const auto & const_col = static_cast(*arg.column); + tmp_arg.column = const_col.getDataColumnPtr(); + + ColumnWithTypeAndName tmp_res = executeNonconstant(tmp_arg); + return ColumnWithTypeAndName{ColumnConst::create(tmp_res.column, arg.column->size()), tmp_res.type, tmp_res.name}; + } + + ColumnWithTypeAndName executeNonconstant(const ColumnWithTypeAndName & arg) + { + size_t size = arg.column->size(); + auto res_col = ColumnString::create(); + auto & res_str = static_cast(*res_col); + auto & res_offsets = res_str.getOffsets(); + auto & res_chars = res_str.getChars(); + res_offsets.reserve_exact(size); + res_chars.reserve(format.size() * size * 2); + + String s; + WhichDataType which(arg.type); + +#define EXECUTE_BY_TYPE(IS_TYPE, GET_TYPE) \ + else if (which.IS_TYPE()) \ + { \ + for (size_t i = 0; i < size; ++i) \ + { \ + auto a = arg.column->GET_TYPE(i); \ + s = fmt::sprintf(format, a); \ + res_str.insertData(s.data(), s.size()); \ + } \ + } + + if (false) + ; + EXECUTE_BY_TYPE(isNativeInt, getInt) + EXECUTE_BY_TYPE(isNativeUInt, getUInt) + EXECUTE_BY_TYPE(isFloat32, getFloat32) + EXECUTE_BY_TYPE(isFloat64, getFloat64) + else if (which.isStringOrFixedString()) + { + for (size_t i = 0; i < size; ++i) + { + auto a = arg.column->getDataAt(i).toView(); + s = fmt::sprintf(format, a); + res_str.insertData(s.data(), s.size()); + } + } + else throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "The argument type of function {} is {}, but native numeric or string type is expected", + FunctionPrintf::name, + arg.type->getName()); +#undef EXECUTE_BY_TYPE + + ColumnWithTypeAndName res; + res.name = arg.name; + res.type = std::make_shared(); + res.column = std::move(res_col); + return res; + } + }; + +public: + static constexpr auto name = "printf"; + + static FunctionPtr create(ContextPtr context) { return std::make_shared(context); } + + explicit FunctionPrintf(ContextPtr context_) + : context(context_), function_concat(FunctionFactory::instance().get("concat", context)) { } + + String getName() const override { return name; } + + bool isVariadic() const override { return true; } + + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } + + size_t getNumberOfArguments() const override { return 0; } + + bool useDefaultImplementationForConstants() const override { return false; } + ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {0}; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + if (arguments.empty()) + throw Exception( + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Number of arguments for function {} doesn't match: passed {}, should be at least 1", + getName(), + arguments.size()); + + /// First pattern argument must have string type + if (!isString(arguments[0])) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "The first argument type of function {} is {}, but String type is expected", + getName(), + arguments[0]->getName()); + + for (size_t i = 1; i < arguments.size(); ++i) + { + if (!isNativeNumber(arguments[i]) && !isStringOrFixedString(arguments[i])) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "The {}-th argument type of function {} is {}, but native numeric or string type is expected", + i + 1, + getName(), + arguments[i]->getName()); + } + return std::make_shared(); + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + { + const ColumnPtr & c0 = arguments[0].column; + const ColumnConst * c0_const_string = typeid_cast(&*c0); + if (!c0_const_string) + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "First argument of function {} must be constant string", getName()); + + String format = c0_const_string->getValue(); + auto instructions = buildInstructions(format, arguments, input_rows_count); + + ColumnsWithTypeAndName concat_args(instructions.size()); + for (size_t i = 0; i < instructions.size(); ++i) + { + std::cout << "instruction[" << i << "]:" << instructions[i].toString() << std::endl; + concat_args[i] = instructions[i].execute(); + std::cout << "concat_args[" << i << "]:" << concat_args[i].dumpStructure() << std::endl; + } + + auto res = function_concat->build(concat_args)->execute(concat_args, std::make_shared(), input_rows_count); + return res; + } + +private: + std::vector buildInstructions(const String & format , const ColumnsWithTypeAndName & arguments, size_t input_rows_count) const + { + std::vector instructions; + instructions.reserve(arguments.size()); + + auto append_instruction = [&](const char * begin, const char * end, const ColumnWithTypeAndName & arg) + { + Instruction instr; + instr.rows = input_rows_count; + instr.format = std::string_view(begin, end - begin); + + size_t size = end - begin; + if (size > 1 && begin[0] == '%' and begin[1] != '%') + { + instr.is_literal = false; + instr.input = arg; + } + else + { + instr.is_literal = true; + } + instructions.emplace_back(std::move(instr)); + }; + + auto check_index_range = [&](size_t idx) + { + if (idx >= arguments.size()) + throw Exception( + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Number of arguments for function {} doesn't match: passed {}, but format is {}", + getName(), + arguments.size(), + format); + }; + + const char * begin = format.data(); + const char * end = format.data() + format.size(); + const char * curr = begin; + size_t idx = 0; + while (curr < end) + { + const char * tmp = curr; + bool is_first = curr == begin; /// If current instruction is the first one + bool is_literal = false; /// If current instruction is literal string without any argument + if (is_first) + { + if (*curr != '%') + is_literal = true; + else if (curr + 1 < end && *(curr + 1) == '%') + is_literal = true; + else + ++idx; /// Skip first argument if first instruction is not literal + } + + if (!is_literal) + ++curr; + + while (curr < end) + { + if (*curr != '%') + ++curr; + else if (curr + 1 < end && *(curr + 1) == '%') + curr += 2; + else + { + check_index_range(idx); + append_instruction(tmp, curr, arguments[idx]); + ++idx; + break; + } + } + + if (curr == end) + { + check_index_range(idx); + append_instruction(tmp, curr, arguments[idx]); + ++idx; + } + } + + /// Check if all arguments are used + if (idx != arguments.size()) + throw Exception( + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Number of arguments for function {} doesn't match: passed {}, but format is {}", + getName(), + arguments.size(), + format); + + return instructions; + } +}; + +} + +REGISTER_FUNCTION(Printf) +{ + factory.registerFunction(); +} + +} diff --git a/tests/queries/0_stateless/032010_printf.reference b/tests/queries/0_stateless/032010_printf.reference new file mode 100644 index 00000000000..58501cbd0fc --- /dev/null +++ b/tests/queries/0_stateless/032010_printf.reference @@ -0,0 +1,16 @@ +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 diff --git a/tests/queries/0_stateless/032010_printf.sql b/tests/queries/0_stateless/032010_printf.sql new file mode 100644 index 00000000000..58fe081e499 --- /dev/null +++ b/tests/queries/0_stateless/032010_printf.sql @@ -0,0 +1,24 @@ +-- Testing integer formats +select printf('%%d: %d', 123) = '%d: 123'; +select printf('%%i: %i', 123) = '%i: 123'; +select printf('%%u: %u', 123) = '%u: 123'; +select printf('%%o: %o', 123) = '%o: 173'; +select printf('%%x: %x', 123) = '%x: 7b'; +select printf('%%X: %X', 123) = '%X: 7B'; + +-- Testing floating point formats +select printf('%%f: %f', 123.456) = '%f: 123.456000'; +select printf('%%F: %F', 123.456) = '%F: 123.456000'; +select printf('%%e: %e', 123.456) = '%e: 1.234560e+02'; +select printf('%%E: %E', 123.456) = '%E: 1.234560E+02'; +select printf('%%g: %g', 123.456) = '%g: 123.456'; +select printf('%%G: %G', 123.456) = '%G: 123.456'; +select printf('%%a: %a', 123.456) = '%a: 0x1.edd2f1a9fbe77p+6'; +select printf('%%A: %A', 123.456) = '%A: 0X1.EDD2F1A9FBE77P+6'; + +-- Testing character formats +select printf('%%s: %s', 'abc') = '%s: abc'; + + +-- Testing the %% specifier +select printf('%%%%: %%') = '%%: %'; \ No newline at end of file From e0e3842772ead940f53346cc087ea54e5e6aa8fa Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Wed, 10 Jul 2024 10:15:33 +0800 Subject: [PATCH 027/661] support printf --- src/Functions/printf.cpp | 6 +++--- utils/check-style/aspell-ignore/en/aspell-dict.txt | 1 + 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/Functions/printf.cpp b/src/Functions/printf.cpp index cb21d5e39ad..b2a0143a3f8 100644 --- a/src/Functions/printf.cpp +++ b/src/Functions/printf.cpp @@ -52,7 +52,7 @@ private: String toString() const { - std::ostringstream oss; + std::ostringstream oss; // STYLE_CHECK_ALLOW_STD_STRING_STREAM oss << "format:" << format << ", rows:" << rows << ", is_literal:" << is_literal << ", input:" << input.dumpStructure() << std::endl; return oss.str(); @@ -195,9 +195,9 @@ public: ColumnsWithTypeAndName concat_args(instructions.size()); for (size_t i = 0; i < instructions.size(); ++i) { - std::cout << "instruction[" << i << "]:" << instructions[i].toString() << std::endl; + // std::cout << "instruction[" << i << "]:" << instructions[i].toString() << std::endl; concat_args[i] = instructions[i].execute(); - std::cout << "concat_args[" << i << "]:" << concat_args[i].dumpStructure() << std::endl; + // std::cout << "concat_args[" << i << "]:" << concat_args[i].dumpStructure() << std::endl; } auto res = function_concat->build(concat_args)->execute(concat_args, std::make_shared(), input_rows_count); diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index 943caf918d6..21a9b759466 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -2264,6 +2264,7 @@ prettyspacemonoblock prettyspacenoescapes prettyspacenoescapesmonoblock prewhere +printf privateKeyFile privateKeyPassphraseHandler prlimit From 8b1bc00e9a6462a6dce2946510d83a85bc69a139 Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Wed, 10 Jul 2024 10:52:53 +0800 Subject: [PATCH 028/661] fix style --- src/Functions/printf.cpp | 33 +++++++++++++++++---------------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/src/Functions/printf.cpp b/src/Functions/printf.cpp index b2a0143a3f8..c7c6bd228a7 100644 --- a/src/Functions/printf.cpp +++ b/src/Functions/printf.cpp @@ -9,19 +9,19 @@ #include #include +#include #include #include #include #include -#include namespace DB { namespace ErrorCodes { - extern const int ILLEGAL_COLUMN; - extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; - extern const int ILLEGAL_TYPE_OF_ARGUMENT; +extern const int ILLEGAL_COLUMN; +extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; +extern const int ILLEGAL_TYPE_OF_ARGUMENT; } namespace @@ -54,7 +54,7 @@ private: { std::ostringstream oss; // STYLE_CHECK_ALLOW_STD_STRING_STREAM oss << "format:" << format << ", rows:" << rows << ", is_literal:" << is_literal << ", input:" << input.dumpStructure() - << std::endl; + << std::endl; return oss.str(); } @@ -93,15 +93,15 @@ private: WhichDataType which(arg.type); #define EXECUTE_BY_TYPE(IS_TYPE, GET_TYPE) \ - else if (which.IS_TYPE()) \ - { \ - for (size_t i = 0; i < size; ++i) \ - { \ - auto a = arg.column->GET_TYPE(i); \ - s = fmt::sprintf(format, a); \ - res_str.insertData(s.data(), s.size()); \ - } \ - } + else if (which.IS_TYPE()) \ + { \ + for (size_t i = 0; i < size; ++i) \ + { \ + auto a = arg.column->GET_TYPE(i); \ + s = fmt::sprintf(format, a); \ + res_str.insertData(s.data(), s.size()); \ + } \ + } if (false) ; @@ -205,7 +205,8 @@ public: } private: - std::vector buildInstructions(const String & format , const ColumnsWithTypeAndName & arguments, size_t input_rows_count) const + std::vector + buildInstructions(const String & format, const ColumnsWithTypeAndName & arguments, size_t input_rows_count) const { std::vector instructions; instructions.reserve(arguments.size()); @@ -248,7 +249,7 @@ private: { const char * tmp = curr; bool is_first = curr == begin; /// If current instruction is the first one - bool is_literal = false; /// If current instruction is literal string without any argument + bool is_literal = false; /// If current instruction is literal string without any argument if (is_first) { if (*curr != '%') From 11a8de50a6283277c585fa2bad74aad1712fb1f2 Mon Sep 17 00:00:00 2001 From: vdimir Date: Wed, 10 Jul 2024 08:56:35 +0000 Subject: [PATCH 029/661] Revert "Disable broken cases from 02911_join_on_nullsafe_optimization" This reverts commit 513ce9fa2f3bb0d2cc1774a07272a249b40f475f. --- ...2911_join_on_nullsafe_optimization.reference | 17 +++++++++++++---- .../02911_join_on_nullsafe_optimization.sql | 5 ++--- 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.reference b/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.reference index 4eb7e74446d..f0463509b80 100644 --- a/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.reference +++ b/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.reference @@ -36,10 +36,19 @@ SELECT * FROM t1 JOIN t2 ON (t1.x == t2.x AND ((t2.x IS NOT NULL) AND (t1.x IS N 3 3 3 33 \N \N \N \N -- aliases defined in the join condition are valid --- FIXME(@vdimir) broken query formatting for the following queries: --- SELECT *, e, e2 FROM t1 FULL JOIN t2 ON ( ( ((t1.x == t2.x) AS e) AND ((t2.x IS NOT NULL) AND (t1.x IS NOT NULL)) ) OR ( (t2.x IS NULL) AND (t1.x IS NULL) ) AS e2 ) ORDER BY t1.x NULLS LAST, t2.x NULLS LAST; --- SELECT *, e, e2 FROM t1 FULL JOIN t2 ON ( ( ((t1.x == t2.x) AS e) AND ((t2.x IS NOT NULL) AND (t1.x IS NOT NULL)) ) AS e2 ) ORDER BY t1.x NULLS LAST, t2.x NULLS LAST; - +SELECT *, e, e2 FROM t1 FULL JOIN t2 ON ( ( ((t1.x == t2.x) AS e) AND ((t2.x IS NOT NULL) AND (t1.x IS NOT NULL)) ) OR ( (t2.x IS NULL) AND (t1.x IS NULL) ) AS e2 ) ORDER BY t1.x NULLS LAST, t2.x NULLS LAST; +1 42 \N \N \N 0 +2 2 2 2 1 1 +3 3 3 33 1 1 +\N \N 4 42 \N 0 +\N \N \N \N \N 1 +SELECT *, e, e2 FROM t1 FULL JOIN t2 ON ( ( ((t1.x == t2.x) AS e) AND ((t2.x IS NOT NULL) AND (t1.x IS NOT NULL)) ) AS e2 ) ORDER BY t1.x NULLS LAST, t2.x NULLS LAST; +1 42 \N \N \N 0 +2 2 2 2 1 1 +3 3 3 33 1 1 +\N \N 4 42 \N 0 +\N \N \N \N \N 0 +\N \N \N \N \N 0 -- check for non-nullable columns for which `is null` is replaced with constant SELECT * FROM t1n as t1 JOIN t2n as t2 ON (t1.x == t2.x AND ((t2.x IS NOT NULL) AND (t1.x IS NOT NULL)) ) OR ( (t2.x IS NULL) AND (t1.x IS NULL) ) ORDER BY t1.x NULLS LAST; 2 2 2 2 diff --git a/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.sql b/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.sql index f7813e2a1b4..67918f4302f 100644 --- a/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.sql +++ b/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.sql @@ -36,9 +36,8 @@ SELECT x = y OR (x IS NULL AND y IS NULL) FROM t1 ORDER BY x NULLS LAST; SELECT * FROM t1 JOIN t2 ON (t1.x == t2.x AND ((t2.x IS NOT NULL) AND (t1.x IS NOT NULL)) ) OR ( (t2.x IS NULL) AND (t1.x IS NULL) ) ORDER BY t1.x NULLS LAST; -- aliases defined in the join condition are valid --- FIXME(@vdimir) broken query formatting for the following queries: --- SELECT *, e, e2 FROM t1 FULL JOIN t2 ON ( ( ((t1.x == t2.x) AS e) AND ((t2.x IS NOT NULL) AND (t1.x IS NOT NULL)) ) OR ( (t2.x IS NULL) AND (t1.x IS NULL) ) AS e2 ) ORDER BY t1.x NULLS LAST, t2.x NULLS LAST; --- SELECT *, e, e2 FROM t1 FULL JOIN t2 ON ( ( ((t1.x == t2.x) AS e) AND ((t2.x IS NOT NULL) AND (t1.x IS NOT NULL)) ) AS e2 ) ORDER BY t1.x NULLS LAST, t2.x NULLS LAST; +SELECT *, e, e2 FROM t1 FULL JOIN t2 ON ( ( ((t1.x == t2.x) AS e) AND ((t2.x IS NOT NULL) AND (t1.x IS NOT NULL)) ) OR ( (t2.x IS NULL) AND (t1.x IS NULL) ) AS e2 ) ORDER BY t1.x NULLS LAST, t2.x NULLS LAST; +SELECT *, e, e2 FROM t1 FULL JOIN t2 ON ( ( ((t1.x == t2.x) AS e) AND ((t2.x IS NOT NULL) AND (t1.x IS NOT NULL)) ) AS e2 ) ORDER BY t1.x NULLS LAST, t2.x NULLS LAST; -- check for non-nullable columns for which `is null` is replaced with constant SELECT * FROM t1n as t1 JOIN t2n as t2 ON (t1.x == t2.x AND ((t2.x IS NOT NULL) AND (t1.x IS NOT NULL)) ) OR ( (t2.x IS NULL) AND (t1.x IS NULL) ) ORDER BY t1.x NULLS LAST; From 6e762d404456debca4ee2d5ccce94deb32c3fbad Mon Sep 17 00:00:00 2001 From: vdimir Date: Wed, 10 Jul 2024 08:57:33 +0000 Subject: [PATCH 030/661] Fix aliased JOIN ON expression formatting --- src/Parsers/ASTTablesInSelectQuery.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/Parsers/ASTTablesInSelectQuery.cpp b/src/Parsers/ASTTablesInSelectQuery.cpp index d22a4eca0fc..dbb2a008bae 100644 --- a/src/Parsers/ASTTablesInSelectQuery.cpp +++ b/src/Parsers/ASTTablesInSelectQuery.cpp @@ -235,7 +235,12 @@ void ASTTableJoin::formatImplAfterTable(const FormatSettings & settings, FormatS else if (on_expression) { settings.ostr << (settings.hilite ? hilite_keyword : "") << " ON " << (settings.hilite ? hilite_none : ""); + bool on_has_alias = !on_expression->tryGetAlias().empty(); + if (on_has_alias) + settings.ostr << "("; on_expression->formatImpl(settings, state, frame); + if (on_has_alias) + settings.ostr << ")"; } } From 96d063bcc39712c5a21a8e51244a9e216af8536a Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Thu, 11 Jul 2024 12:10:44 +0800 Subject: [PATCH 031/661] renmae ut files --- src/Functions/printf.cpp | 118 +++++++++++------- ...erence => 03203_function_printf.reference} | 0 ...0_printf.sql => 03203_function_printf.sql} | 0 3 files changed, 73 insertions(+), 45 deletions(-) rename tests/queries/0_stateless/{032010_printf.reference => 03203_function_printf.reference} (100%) rename tests/queries/0_stateless/{032010_printf.sql => 03203_function_printf.sql} (100%) diff --git a/src/Functions/printf.cpp b/src/Functions/printf.cpp index c7c6bd228a7..247c4a65daf 100644 --- a/src/Functions/printf.cpp +++ b/src/Functions/printf.cpp @@ -40,7 +40,7 @@ private: bool is_literal; /// format is literal string without any argument ColumnWithTypeAndName input; /// Only used when is_literal is false - ColumnWithTypeAndName execute() + ColumnWithTypeAndName execute() const { if (is_literal) return executeLiteral(format); @@ -50,7 +50,7 @@ private: return executeNonconstant(input); } - String toString() const + [[maybe_unused]] String toString() const { std::ostringstream oss; // STYLE_CHECK_ALLOW_STD_STRING_STREAM oss << "format:" << format << ", rows:" << rows << ", is_literal:" << is_literal << ", input:" << input.dumpStructure() @@ -59,7 +59,7 @@ private: } private: - ColumnWithTypeAndName executeLiteral(std::string_view literal) + ColumnWithTypeAndName executeLiteral(std::string_view literal) const { ColumnWithTypeAndName res; auto str_col = ColumnString::create(); @@ -69,7 +69,7 @@ private: return res; } - ColumnWithTypeAndName executeConstant(const ColumnWithTypeAndName & arg) + ColumnWithTypeAndName executeConstant(const ColumnWithTypeAndName & arg) const { ColumnWithTypeAndName tmp_arg = arg; const auto & const_col = static_cast(*arg.column); @@ -79,57 +79,85 @@ private: return ColumnWithTypeAndName{ColumnConst::create(tmp_res.column, arg.column->size()), tmp_res.type, tmp_res.name}; } - ColumnWithTypeAndName executeNonconstant(const ColumnWithTypeAndName & arg) + template + bool executeNumber(const IColumn & column, ColumnString::Chars & res_chars, ColumnString::Offsets & res_offsets) const + { + const ColumnVector * concrete_column = checkAndGetColumn>(&column); + if (!concrete_column) + return false; + + String s; + size_t curr_offset = 0; + const auto & data = concrete_column->getData(); + for (size_t i = 0; i < data.size(); ++i) + { + T a = data[i]; + s = fmt::sprintf(format, static_cast>(a)); + memcpy(&res_chars[curr_offset], s.data(), s.size()); + res_chars[curr_offset + s.size()] = 0; + + curr_offset += s.size() + 1; + res_offsets[i] = curr_offset; + } + return true; + } + + template + bool executeString(const IColumn & column, ColumnString::Chars & res_chars, ColumnString::Offsets & res_offsets) const + { + const COLUMN * concrete_column = checkAndGetColumn(&column); + if (!concrete_column) + return false; + + String s; + size_t curr_offset = 0; + for (size_t i = 0; i < concrete_column->size(); ++i) + { + auto a = concrete_column->getDataAt(i).toView(); + s = fmt::sprintf(format, a); + memcpy(&res_chars[curr_offset], s.data(), s.size()); + res_chars[curr_offset + s.size()] = 0; + + curr_offset += s.size() + 1; + res_offsets[i] = curr_offset; + } + return true; + } + + ColumnWithTypeAndName executeNonconstant(const ColumnWithTypeAndName & arg) const { size_t size = arg.column->size(); auto res_col = ColumnString::create(); auto & res_str = static_cast(*res_col); auto & res_offsets = res_str.getOffsets(); auto & res_chars = res_str.getChars(); - res_offsets.reserve_exact(size); - res_chars.reserve(format.size() * size * 2); + res_offsets.resize_exact(size); + res_chars.reserve(format.size() * size); - String s; WhichDataType which(arg.type); - -#define EXECUTE_BY_TYPE(IS_TYPE, GET_TYPE) \ - else if (which.IS_TYPE()) \ - { \ - for (size_t i = 0; i < size; ++i) \ - { \ - auto a = arg.column->GET_TYPE(i); \ - s = fmt::sprintf(format, a); \ - res_str.insertData(s.data(), s.size()); \ - } \ - } - - if (false) - ; - EXECUTE_BY_TYPE(isNativeInt, getInt) - EXECUTE_BY_TYPE(isNativeUInt, getUInt) - EXECUTE_BY_TYPE(isFloat32, getFloat32) - EXECUTE_BY_TYPE(isFloat64, getFloat64) - else if (which.isStringOrFixedString()) + if (which.isNativeNumber() + && (executeNumber(*arg.column, res_chars, res_offsets) || executeNumber(*arg.column, res_chars, res_offsets) + || executeNumber(*arg.column, res_chars, res_offsets) + || executeNumber(*arg.column, res_chars, res_offsets) + || executeNumber(*arg.column, res_chars, res_offsets) || executeNumber(*arg.column, res_chars, res_offsets) + || executeNumber(*arg.column, res_chars, res_offsets) + || executeNumber(*arg.column, res_chars, res_offsets))) { - for (size_t i = 0; i < size; ++i) - { - auto a = arg.column->getDataAt(i).toView(); - s = fmt::sprintf(format, a); - res_str.insertData(s.data(), s.size()); - } + return {std::move(res_col), std::make_shared(), arg.name}; } - else throw Exception( - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "The argument type of function {} is {}, but native numeric or string type is expected", - FunctionPrintf::name, - arg.type->getName()); -#undef EXECUTE_BY_TYPE - - ColumnWithTypeAndName res; - res.name = arg.name; - res.type = std::make_shared(); - res.column = std::move(res_col); - return res; + else if ( + which.isStringOrFixedString() + && (executeString(*arg.column, res_chars, res_offsets) + || executeString(*arg.column, res_chars, res_offsets))) + { + return {std::move(res_col), std::make_shared(), arg.name}; + } + else + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "The argument type of function {} is {}, but native numeric or string type is expected", + FunctionPrintf::name, + arg.type->getName()); } }; diff --git a/tests/queries/0_stateless/032010_printf.reference b/tests/queries/0_stateless/03203_function_printf.reference similarity index 100% rename from tests/queries/0_stateless/032010_printf.reference rename to tests/queries/0_stateless/03203_function_printf.reference diff --git a/tests/queries/0_stateless/032010_printf.sql b/tests/queries/0_stateless/03203_function_printf.sql similarity index 100% rename from tests/queries/0_stateless/032010_printf.sql rename to tests/queries/0_stateless/03203_function_printf.sql From 88851ddb569f9ae8c61420bde99d2ad5f3d76889 Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Thu, 11 Jul 2024 12:15:44 +0800 Subject: [PATCH 032/661] improve uts --- .../0_stateless/03203_function_printf.reference | 5 +++++ .../queries/0_stateless/03203_function_printf.sql | 14 ++++++++++++-- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/03203_function_printf.reference b/tests/queries/0_stateless/03203_function_printf.reference index 58501cbd0fc..338ecb0183d 100644 --- a/tests/queries/0_stateless/03203_function_printf.reference +++ b/tests/queries/0_stateless/03203_function_printf.reference @@ -14,3 +14,8 @@ 1 1 1 +1 +1 +1 +1 +1 diff --git a/tests/queries/0_stateless/03203_function_printf.sql b/tests/queries/0_stateless/03203_function_printf.sql index 58fe081e499..c41cbf0b5e9 100644 --- a/tests/queries/0_stateless/03203_function_printf.sql +++ b/tests/queries/0_stateless/03203_function_printf.sql @@ -19,6 +19,16 @@ select printf('%%A: %A', 123.456) = '%A: 0X1.EDD2F1A9FBE77P+6'; -- Testing character formats select printf('%%s: %s', 'abc') = '%s: abc'; - -- Testing the %% specifier -select printf('%%%%: %%') = '%%: %'; \ No newline at end of file +select printf('%%%%: %%') = '%%: %'; + +-- Testing integer formats with precision +select printf('%%.5d: %.5d', 123) = '%.5d: 00123'; + +-- Testing floating point formats with precision +select printf('%%.2f: %.2f', 123.456) = '%.2f: 123.46'; +select printf('%%.2e: %.2e', 123.456) = '%.2e: 1.23e+02'; +select printf('%%.2g: %.2g', 123.456) = '%.2g: 1.2e+02'; + +-- Testing character formats with precision +select printf('%%.2s: %.2s', 'abc') = '%.2s: ab'; \ No newline at end of file From d988399aa68ad8a6bc412bfc48d9fdefe63c1657 Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Thu, 11 Jul 2024 14:32:24 +0800 Subject: [PATCH 033/661] fix failed uts --- src/Functions/printf.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/Functions/printf.cpp b/src/Functions/printf.cpp index 247c4a65daf..a890b886338 100644 --- a/src/Functions/printf.cpp +++ b/src/Functions/printf.cpp @@ -141,7 +141,9 @@ private: || executeNumber(*arg.column, res_chars, res_offsets) || executeNumber(*arg.column, res_chars, res_offsets) || executeNumber(*arg.column, res_chars, res_offsets) || executeNumber(*arg.column, res_chars, res_offsets) - || executeNumber(*arg.column, res_chars, res_offsets))) + || executeNumber(*arg.column, res_chars, res_offsets) + || executeNumber(*arg.column, res_chars, res_offsets) + || executeNumber(*arg.column, res_chars, res_offsets))) { return {std::move(res_col), std::make_shared(), arg.name}; } From 21ca5f2d65c936a2c5b5fbc8f3f0c40d0ce60a6a Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Thu, 11 Jul 2024 15:32:05 +0800 Subject: [PATCH 034/661] fix failed ut --- .../02415_all_new_functions_must_be_documented.reference | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference index a152066a460..873b6bbb660 100644 --- a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference +++ b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference @@ -562,6 +562,7 @@ positionCaseInsensitive positionCaseInsensitiveUTF8 positionUTF8 pow +printf proportionsZTest protocol queryID From c6b558c7915b070167649d4e88eafb2613570bd3 Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Thu, 4 Jul 2024 22:30:18 +0200 Subject: [PATCH 035/661] Done --- .../02814_currentDatabase_for_table_functions.sql | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02814_currentDatabase_for_table_functions.sql b/tests/queries/0_stateless/02814_currentDatabase_for_table_functions.sql index 74b5cf5f432..8b1e3ba1e10 100644 --- a/tests/queries/0_stateless/02814_currentDatabase_for_table_functions.sql +++ b/tests/queries/0_stateless/02814_currentDatabase_for_table_functions.sql @@ -13,7 +13,13 @@ CREATE MATERIALIZED VIEW null_mv Engine = Log AS SELECT * FROM null_table LEFT J CREATE TABLE null_table_buffer (number UInt64) ENGINE = Buffer(currentDatabase(), null_table, 1, 1, 1, 100, 200, 10000, 20000); INSERT INTO null_table_buffer VALUES (1); -SELECT sleep(3) FORMAT Null; + +-- OPTIMIZE query should flush Buffer table, but still it is not guaranteed +-- (see the comment StorageBuffer::optimize) +-- But the combination of OPTIMIZE + sleep + OPTIMIZE should be enough. +OPTIMIZE TABLE null_table_buffer; +SELECT sleep(1) FORMAT Null; +OPTIMIZE TABLE null_table_buffer; -- Insert about should've landed into `null_mv` SELECT count() FROM null_mv; From ca7e003c6d7af6bf0676bba7cb61ab560c202bf3 Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Fri, 12 Jul 2024 15:27:03 +0000 Subject: [PATCH 036/661] Fixed test --- .../02814_currentDatabase_for_table_functions.reference | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02814_currentDatabase_for_table_functions.reference b/tests/queries/0_stateless/02814_currentDatabase_for_table_functions.reference index 7ff95106d3d..20b14d9a67b 100644 --- a/tests/queries/0_stateless/02814_currentDatabase_for_table_functions.reference +++ b/tests/queries/0_stateless/02814_currentDatabase_for_table_functions.reference @@ -11,7 +11,12 @@ CREATE VIEW number_view as SELECT * FROM numbers(10) as tb; CREATE MATERIALIZED VIEW null_mv Engine = Log AS SELECT * FROM null_table LEFT JOIN number_view as tb USING number; CREATE TABLE null_table_buffer (number UInt64) ENGINE = Buffer(currentDatabase(), null_table, 1, 1, 1, 100, 200, 10000, 20000); INSERT INTO null_table_buffer VALUES (1); -SELECT sleep(3) FORMAT Null; +-- OPTIMIZE query should flush Buffer table, but still it is not guaranteed +-- (see the comment StorageBuffer::optimize) +-- But the combination of OPTIMIZE + sleep + OPTIMIZE should be enough. +OPTIMIZE TABLE null_table_buffer; +SELECT sleep(1) FORMAT Null; +OPTIMIZE TABLE null_table_buffer; -- Insert about should've landed into `null_mv` SELECT count() FROM null_mv; 1 From a6e737ef2afc7fb18d661295e6f84cc3e0478ae1 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 12 Jul 2024 17:19:30 +0000 Subject: [PATCH 037/661] Cleaner FilterDAGInfo. --- src/Interpreters/ActionsDAG.cpp | 25 +++++++------- src/Interpreters/ActionsDAG.h | 2 +- src/Interpreters/ExpressionActions.cpp | 2 +- src/Interpreters/ExpressionAnalyzer.cpp | 2 +- src/Interpreters/InterpreterSelectQuery.cpp | 18 +++++------ src/Planner/Planner.cpp | 4 +-- src/Planner/PlannerJoinTree.cpp | 36 +++++++++------------ src/Storages/IStorage.cpp | 6 ++-- src/Storages/MergeTree/MergeTreeData.cpp | 2 +- src/Storages/SelectQueryInfo.h | 2 +- src/Storages/StorageMerge.cpp | 6 ++-- 11 files changed, 49 insertions(+), 56 deletions(-) diff --git a/src/Interpreters/ActionsDAG.cpp b/src/Interpreters/ActionsDAG.cpp index 04be9d23c32..4401c83549f 100644 --- a/src/Interpreters/ActionsDAG.cpp +++ b/src/Interpreters/ActionsDAG.cpp @@ -1249,31 +1249,30 @@ bool ActionsDAG::removeUnusedResult(const std::string & column_name) ActionsDAGPtr ActionsDAG::clone(const ActionsDAG * from) { std::unordered_map old_to_new_nodes; - return ActionsDAG::clone(from, old_to_new_nodes); + if (from == nullptr) + return nullptr; + return std::make_unique(ActionsDAG::clone(*from, old_to_new_nodes)); } -ActionsDAGPtr ActionsDAG::clone(const ActionsDAG * from, std::unordered_map & old_to_new_nodes) +ActionsDAG ActionsDAG::clone(const ActionsDAG & from, std::unordered_map & old_to_new_nodes) { - if (!from) - return nullptr; + ActionsDAG actions; - auto actions = std::make_unique(); - - for (const auto & node : from->nodes) + for (const auto & node : from.nodes) { - auto & copy_node = actions->nodes.emplace_back(node); + auto & copy_node = actions.nodes.emplace_back(node); old_to_new_nodes[&node] = ©_node; } - for (auto & node : actions->nodes) + for (auto & node : actions.nodes) for (auto & child : node.children) child = old_to_new_nodes[child]; - for (const auto & output_node : from->outputs) - actions->outputs.push_back(old_to_new_nodes[output_node]); + for (const auto & output_node : from.outputs) + actions.outputs.push_back(old_to_new_nodes[output_node]); - for (const auto & input_node : from->inputs) - actions->inputs.push_back(old_to_new_nodes[input_node]); + for (const auto & input_node : from.inputs) + actions.inputs.push_back(old_to_new_nodes[input_node]); return actions; } diff --git a/src/Interpreters/ActionsDAG.h b/src/Interpreters/ActionsDAG.h index cf6a91b9fe7..f428ca2f01c 100644 --- a/src/Interpreters/ActionsDAG.h +++ b/src/Interpreters/ActionsDAG.h @@ -263,7 +263,7 @@ public: static ActionsDAGPtr clone(const ActionsDAGPtr & from) { return clone(from.get()); } static ActionsDAGPtr clone(const ActionsDAG * from); - static ActionsDAGPtr clone(const ActionsDAG * from, std::unordered_map & old_to_new_nodes); + static ActionsDAG clone(const ActionsDAG & from, std::unordered_map & old_to_new_nodes); static ActionsDAGPtr cloneSubDAG(const NodeRawConstPtrs & outputs, bool remove_aliases); diff --git a/src/Interpreters/ExpressionActions.cpp b/src/Interpreters/ExpressionActions.cpp index 1c6c3f2556b..dd1d2eb703e 100644 --- a/src/Interpreters/ExpressionActions.cpp +++ b/src/Interpreters/ExpressionActions.cpp @@ -75,7 +75,7 @@ ExpressionActionsPtr ExpressionActions::clone() const auto copy = std::make_shared(ExpressionActions()); std::unordered_map copy_map; - copy->actions_dag = std::move(*ActionsDAG::clone(&actions_dag, copy_map)); + copy->actions_dag = ActionsDAG::clone(actions_dag, copy_map); copy->actions = actions; for (auto & action : copy->actions) action.node = copy_map[action.node]; diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index 068b6f290fa..286eda14b3f 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -1922,7 +1922,7 @@ ExpressionAnalysisResult::ExpressionAnalysisResult( if (storage && additional_filter) { - Names columns_for_additional_filter = additional_filter->actions->getRequiredColumnsNames(); + Names columns_for_additional_filter = additional_filter->actions.getRequiredColumnsNames(); additional_required_columns_after_prewhere.insert(additional_required_columns_after_prewhere.end(), columns_for_additional_filter.begin(), columns_for_additional_filter.end()); } diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index cde6e305005..e723e5f7982 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -178,12 +178,12 @@ FilterDAGInfoPtr generateFilterActions( filter_info->actions = std::move(analyzer.simpleSelectActions()->dag); filter_info->column_name = expr_list->children.at(0)->getColumnName(); - filter_info->actions->removeUnusedActions(NameSet{filter_info->column_name}); + filter_info->actions.removeUnusedActions(NameSet{filter_info->column_name}); - for (const auto * node : filter_info->actions->getInputs()) - filter_info->actions->getOutputs().push_back(node); + for (const auto * node : filter_info->actions.getInputs()) + filter_info->actions.getOutputs().push_back(node); - auto required_columns_from_filter = filter_info->actions->getRequiredColumns(); + auto required_columns_from_filter = filter_info->actions.getRequiredColumns(); for (const auto & column : required_columns_from_filter) { @@ -1486,7 +1486,7 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional

( query_plan.getCurrentDataStream(), - std::move(*ActionsDAG::clone(&*expressions.filter_info->actions)), + std::move(*ActionsDAG::clone(&expressions.filter_info->actions)), expressions.filter_info->column_name, expressions.filter_info->do_remove_column); @@ -1612,7 +1612,7 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional

( query_plan.getCurrentDataStream(), - std::move(*ActionsDAG::clone(&*expressions.filter_info->actions)), + std::move(*ActionsDAG::clone(&expressions.filter_info->actions)), expressions.filter_info->column_name, expressions.filter_info->do_remove_column); @@ -1620,11 +1620,11 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional

( query_plan.getCurrentDataStream(), - std::move(*ActionsDAG::clone(&*new_filter_info->actions)), + std::move(new_filter_info->actions), new_filter_info->column_name, new_filter_info->do_remove_column); @@ -2107,7 +2107,7 @@ void InterpreterSelectQuery::applyFiltersToPrewhereInAnalysis(ExpressionAnalysis else { /// Add row level security actions to prewhere. - analysis.prewhere_info->row_level_filter = std::move(*analysis.filter_info->actions); + analysis.prewhere_info->row_level_filter = std::move(analysis.filter_info->actions); analysis.prewhere_info->row_level_column_name = std::move(analysis.filter_info->column_name); analysis.filter_info = nullptr; } diff --git a/src/Planner/Planner.cpp b/src/Planner/Planner.cpp index 0b10cef82ce..ffed19185d3 100644 --- a/src/Planner/Planner.cpp +++ b/src/Planner/Planner.cpp @@ -1128,11 +1128,11 @@ void addAdditionalFilterStepIfNeeded(QueryPlan & query_plan, auto fake_table_expression = std::make_shared(std::move(storage), query_context); auto filter_info = buildFilterInfo(additional_result_filter_ast, fake_table_expression, planner_context, std::move(fake_name_set)); - if (!filter_info.actions || !query_plan.isInitialized()) + if (!query_plan.isInitialized()) return; auto filter_step = std::make_unique(query_plan.getCurrentDataStream(), - std::move(*filter_info.actions), + std::move(filter_info.actions), filter_info.column_name, filter_info.do_remove_column); filter_step->setStepDescription("additional result filter"); diff --git a/src/Planner/PlannerJoinTree.cpp b/src/Planner/PlannerJoinTree.cpp index fa3a3483a8e..3217d3461d3 100644 --- a/src/Planner/PlannerJoinTree.cpp +++ b/src/Planner/PlannerJoinTree.cpp @@ -458,7 +458,7 @@ void updatePrewhereOutputsIfNeeded(SelectQueryInfo & table_expression_query_info prewhere_outputs.insert(prewhere_outputs.end(), required_output_nodes.begin(), required_output_nodes.end()); } -FilterDAGInfo buildRowPolicyFilterIfNeeded(const StoragePtr & storage, +std::optional buildRowPolicyFilterIfNeeded(const StoragePtr & storage, SelectQueryInfo & table_expression_query_info, PlannerContextPtr & planner_context, std::set & used_row_policies) @@ -479,7 +479,7 @@ FilterDAGInfo buildRowPolicyFilterIfNeeded(const StoragePtr & storage, return buildFilterInfo(row_policy_filter->expression, table_expression_query_info.table_expression, planner_context); } -FilterDAGInfo buildCustomKeyFilterIfNeeded(const StoragePtr & storage, +std::optional buildCustomKeyFilterIfNeeded(const StoragePtr & storage, SelectQueryInfo & table_expression_query_info, PlannerContextPtr & planner_context) { @@ -513,7 +513,7 @@ FilterDAGInfo buildCustomKeyFilterIfNeeded(const StoragePtr & storage, } /// Apply filters from additional_table_filters setting -FilterDAGInfo buildAdditionalFiltersIfNeeded(const StoragePtr & storage, +std::optional buildAdditionalFiltersIfNeeded(const StoragePtr & storage, const String & table_expression_alias, SelectQueryInfo & table_expression_query_info, PlannerContextPtr & planner_context) @@ -789,9 +789,6 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres std::vector> where_filters; const auto add_filter = [&](FilterDAGInfo & filter_info, std::string description) { - if (!filter_info.actions) - return; - bool is_final = table_expression_query_info.table_expression_modifiers && table_expression_query_info.table_expression_modifiers->hasFinal(); bool optimize_move_to_prewhere @@ -805,14 +802,14 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres if (!prewhere_info->prewhere_actions) { - prewhere_info->prewhere_actions = std::move(*filter_info.actions); + prewhere_info->prewhere_actions = std::move(filter_info.actions); prewhere_info->prewhere_column_name = filter_info.column_name; prewhere_info->remove_prewhere_column = filter_info.do_remove_column; prewhere_info->need_filter = true; } else if (!prewhere_info->row_level_filter) { - prewhere_info->row_level_filter = std::move(*filter_info.actions); + prewhere_info->row_level_filter = std::move(filter_info.actions); prewhere_info->row_level_column_name = filter_info.column_name; prewhere_info->need_filter = true; } @@ -830,17 +827,18 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres auto row_policy_filter_info = buildRowPolicyFilterIfNeeded(storage, table_expression_query_info, planner_context, used_row_policies); - if (row_policy_filter_info.actions) - table_expression_data.setRowLevelFilterActions(ActionsDAG::clone(&*row_policy_filter_info.actions)); - add_filter(row_policy_filter_info, "Row-level security filter"); + if (row_policy_filter_info) + { + table_expression_data.setRowLevelFilterActions(ActionsDAG::clone(&row_policy_filter_info->actions)); + add_filter(*row_policy_filter_info, "Row-level security filter"); + } if (query_context->getParallelReplicasMode() == Context::ParallelReplicasMode::CUSTOM_KEY) { if (settings.parallel_replicas_count > 1) { - auto parallel_replicas_custom_key_filter_info - = buildCustomKeyFilterIfNeeded(storage, table_expression_query_info, planner_context); - add_filter(parallel_replicas_custom_key_filter_info, "Parallel replicas custom key filter"); + if (auto parallel_replicas_custom_key_filter_info= buildCustomKeyFilterIfNeeded(storage, table_expression_query_info, planner_context)) + add_filter(*parallel_replicas_custom_key_filter_info, "Parallel replicas custom key filter"); } else if (auto * distributed = typeid_cast(storage.get()); distributed && query_context->canUseParallelReplicasCustomKey(*distributed->getCluster())) @@ -850,9 +848,8 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres } const auto & table_expression_alias = table_expression->getOriginalAlias(); - auto additional_filters_info - = buildAdditionalFiltersIfNeeded(storage, table_expression_alias, table_expression_query_info, planner_context); - add_filter(additional_filters_info, "additional filter"); + if (auto additional_filters_info = buildAdditionalFiltersIfNeeded(storage, table_expression_alias, table_expression_query_info, planner_context)) + add_filter(*additional_filters_info, "additional filter"); from_stage = storage->getQueryProcessingStage( query_context, select_query_options.to_stage, storage_snapshot, table_expression_query_info); @@ -967,11 +964,10 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres for (auto && [filter_info, description] : where_filters) { if (query_plan.isInitialized() && - from_stage == QueryProcessingStage::FetchColumns && - filter_info.actions) + from_stage == QueryProcessingStage::FetchColumns) { auto filter_step = std::make_unique(query_plan.getCurrentDataStream(), - std::move(*filter_info.actions), + std::move(filter_info.actions), filter_info.column_name, filter_info.do_remove_column); filter_step->setStepDescription(description); diff --git a/src/Storages/IStorage.cpp b/src/Storages/IStorage.cpp index 57f79a2cd7f..4164608b4b5 100644 --- a/src/Storages/IStorage.cpp +++ b/src/Storages/IStorage.cpp @@ -340,10 +340,8 @@ std::string FilterDAGInfo::dump() const WriteBufferFromOwnString ss; ss << "FilterDAGInfo for column '" << column_name <<"', do_remove_column " << do_remove_column << "\n"; - if (actions) - { - ss << "actions " << actions->dumpDAG() << "\n"; - } + + ss << "actions " << actions.dumpDAG() << "\n"; return ss.str(); } diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 334c8c9c5ac..88fb52a94f2 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -7058,7 +7058,7 @@ ActionDAGNodes MergeTreeData::getFiltersForPrimaryKeyAnalysis(const InterpreterS ActionDAGNodes filter_nodes; if (auto additional_filter_info = select.getAdditionalQueryInfo()) - filter_nodes.nodes.push_back(&additional_filter_info->actions->findInOutputs(additional_filter_info->column_name)); + filter_nodes.nodes.push_back(&additional_filter_info->actions.findInOutputs(additional_filter_info->column_name)); if (before_where) filter_nodes.nodes.push_back(&before_where->dag.findInOutputs(where_column_name)); diff --git a/src/Storages/SelectQueryInfo.h b/src/Storages/SelectQueryInfo.h index 5276870c037..97b36115dfd 100644 --- a/src/Storages/SelectQueryInfo.h +++ b/src/Storages/SelectQueryInfo.h @@ -93,7 +93,7 @@ struct FilterInfo /// Same as FilterInfo, but with ActionsDAG. struct FilterDAGInfo { - std::optional actions; + ActionsDAG actions; String column_name; bool do_remove_column = false; diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp index 374abd0b0a5..18e194491b8 100644 --- a/src/Storages/StorageMerge.cpp +++ b/src/Storages/StorageMerge.cpp @@ -407,7 +407,7 @@ void ReadFromMerge::addFilter(FilterDAGInfo filter) { output_stream->header = FilterTransform::transformHeader( output_stream->header, - filter.actions ? &*filter.actions : nullptr, + &filter.actions, filter.column_name, filter.do_remove_column); pushed_down_filters.push_back(std::move(filter)); @@ -662,7 +662,7 @@ std::vector ReadFromMerge::createChildrenPlans(SelectQ { auto filter_step = std::make_unique( child.plan.getCurrentDataStream(), - std::move(*ActionsDAG::clone(&*filter_info.actions)), + std::move(*ActionsDAG::clone(&filter_info.actions)), filter_info.column_name, filter_info.do_remove_column); @@ -1565,7 +1565,7 @@ bool ReadFromMerge::requestReadingInOrder(InputOrderInfoPtr order_info_) void ReadFromMerge::applyFilters(ActionDAGNodes added_filter_nodes) { for (const auto & filter_info : pushed_down_filters) - added_filter_nodes.nodes.push_back(&filter_info.actions->findInOutputs(filter_info.column_name)); + added_filter_nodes.nodes.push_back(&filter_info.actions.findInOutputs(filter_info.column_name)); SourceStepWithFilter::applyFilters(added_filter_nodes); From fb7cf4ab93c991b3e2cd8a3e3e1c6cecf574b936 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 12 Jul 2024 17:46:03 +0000 Subject: [PATCH 038/661] Better. --- src/Interpreters/ActionsDAG.cpp | 16 +++++++++++----- src/Interpreters/ActionsDAG.h | 4 +++- src/Interpreters/ExpressionActions.cpp | 2 +- src/Planner/CollectTableExpressionData.cpp | 10 +++++----- src/Planner/Planner.cpp | 4 ++-- src/Planner/PlannerJoinTree.cpp | 7 ++++--- src/Planner/TableExpressionData.h | 18 +++++++++--------- 7 files changed, 35 insertions(+), 26 deletions(-) diff --git a/src/Interpreters/ActionsDAG.cpp b/src/Interpreters/ActionsDAG.cpp index 4401c83549f..4f03a9e1602 100644 --- a/src/Interpreters/ActionsDAG.cpp +++ b/src/Interpreters/ActionsDAG.cpp @@ -1251,14 +1251,20 @@ ActionsDAGPtr ActionsDAG::clone(const ActionsDAG * from) std::unordered_map old_to_new_nodes; if (from == nullptr) return nullptr; - return std::make_unique(ActionsDAG::clone(*from, old_to_new_nodes)); + return std::make_unique(from->clone(old_to_new_nodes)); } -ActionsDAG ActionsDAG::clone(const ActionsDAG & from, std::unordered_map & old_to_new_nodes) +ActionsDAG ActionsDAG::clone() const +{ + std::unordered_map old_to_new_nodes; + return clone(old_to_new_nodes); +} + +ActionsDAG ActionsDAG::clone(std::unordered_map & old_to_new_nodes) const { ActionsDAG actions; - for (const auto & node : from.nodes) + for (const auto & node : nodes) { auto & copy_node = actions.nodes.emplace_back(node); old_to_new_nodes[&node] = ©_node; @@ -1268,10 +1274,10 @@ ActionsDAG ActionsDAG::clone(const ActionsDAG & from, std::unordered_map & old_to_new_nodes); + + ActionsDAG clone(std::unordered_map & old_to_new_nodes) const; + ActionsDAG clone() const; static ActionsDAGPtr cloneSubDAG(const NodeRawConstPtrs & outputs, bool remove_aliases); diff --git a/src/Interpreters/ExpressionActions.cpp b/src/Interpreters/ExpressionActions.cpp index dd1d2eb703e..113410b1480 100644 --- a/src/Interpreters/ExpressionActions.cpp +++ b/src/Interpreters/ExpressionActions.cpp @@ -75,7 +75,7 @@ ExpressionActionsPtr ExpressionActions::clone() const auto copy = std::make_shared(ExpressionActions()); std::unordered_map copy_map; - copy->actions_dag = ActionsDAG::clone(actions_dag, copy_map); + copy->actions_dag = actions_dag.clone(copy_map); copy->actions = actions; for (auto & action : copy->actions) action.node = copy_map[action.node]; diff --git a/src/Planner/CollectTableExpressionData.cpp b/src/Planner/CollectTableExpressionData.cpp index 162d3fe8d11..1d85476636c 100644 --- a/src/Planner/CollectTableExpressionData.cpp +++ b/src/Planner/CollectTableExpressionData.cpp @@ -335,22 +335,22 @@ void collectTableExpressionData(QueryTreeNodePtr & query_node, PlannerContextPtr collect_source_columns_visitor.setKeepAliasColumns(false); collect_source_columns_visitor.visit(query_node_typed.getPrewhere()); - auto prewhere_actions_dag = std::make_unique(); + ActionsDAG prewhere_actions_dag; QueryTreeNodePtr query_tree_node = query_node_typed.getPrewhere(); PlannerActionsVisitor visitor(planner_context, false /*use_column_identifier_as_action_node_name*/); - auto expression_nodes = visitor.visit(*prewhere_actions_dag, query_tree_node); + auto expression_nodes = visitor.visit(prewhere_actions_dag, query_tree_node); if (expression_nodes.size() != 1) throw Exception(ErrorCodes::ILLEGAL_PREWHERE, "Invalid PREWHERE. Expected single boolean expression. In query {}", query_node->formatASTForErrorMessage()); - prewhere_actions_dag->getOutputs().push_back(expression_nodes.back()); + prewhere_actions_dag.getOutputs().push_back(expression_nodes.back()); - for (const auto & prewhere_input_node : prewhere_actions_dag->getInputs()) + for (const auto & prewhere_input_node : prewhere_actions_dag.getInputs()) if (required_column_names_without_prewhere.contains(prewhere_input_node->result_name)) - prewhere_actions_dag->getOutputs().push_back(prewhere_input_node); + prewhere_actions_dag.getOutputs().push_back(prewhere_input_node); table_expression_data.setPrewhereFilterActions(std::move(prewhere_actions_dag)); } diff --git a/src/Planner/Planner.cpp b/src/Planner/Planner.cpp index ffed19185d3..9042303d0e4 100644 --- a/src/Planner/Planner.cpp +++ b/src/Planner/Planner.cpp @@ -1412,7 +1412,7 @@ void Planner::buildPlanForQueryNode() checkStoragesSupportTransactions(planner_context); const auto & table_filters = planner_context->getGlobalPlannerContext()->filters_for_table_expressions; - if (!select_query_options.only_analyze && !table_filters.empty()) // && top_level) + if (!select_query_options.only_analyze && !table_filters.empty()) { for (auto & [table_node, table_expression_data] : planner_context->getTableExpressionNodeToData()) { @@ -1420,7 +1420,7 @@ void Planner::buildPlanForQueryNode() if (it != table_filters.end()) { const auto & filters = it->second; - table_expression_data.setFilterActions(ActionsDAG::clone(&*filters.filter_actions)); + table_expression_data.setFilterActions(filters.filter_actions->clone()); table_expression_data.setPrewhereInfo(filters.prewhere_info); } } diff --git a/src/Planner/PlannerJoinTree.cpp b/src/Planner/PlannerJoinTree.cpp index 3217d3461d3..d55e5e99f71 100644 --- a/src/Planner/PlannerJoinTree.cpp +++ b/src/Planner/PlannerJoinTree.cpp @@ -646,7 +646,8 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres auto table_expression_query_info = select_query_info; table_expression_query_info.table_expression = table_expression; - table_expression_query_info.filter_actions_dag = ActionsDAG::clone(table_expression_data.getFilterActions()); + if (const auto & filter_actions = table_expression_data.getFilterActions()) + table_expression_query_info.filter_actions_dag = std::make_shared(filter_actions->clone()); table_expression_query_info.analyzer_can_use_parallel_replicas_on_follower = table_node == planner_context->getGlobalPlannerContext()->parallel_replicas_table; size_t max_streams = settings.max_threads; @@ -776,7 +777,7 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres if (prewhere_actions) { prewhere_info = std::make_shared(); - prewhere_info->prewhere_actions = std::move(*ActionsDAG::clone(prewhere_actions)); + prewhere_info->prewhere_actions = prewhere_actions->clone(); prewhere_info->prewhere_column_name = prewhere_actions->getOutputs().at(0)->result_name; prewhere_info->remove_prewhere_column = true; prewhere_info->need_filter = true; @@ -829,7 +830,7 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres = buildRowPolicyFilterIfNeeded(storage, table_expression_query_info, planner_context, used_row_policies); if (row_policy_filter_info) { - table_expression_data.setRowLevelFilterActions(ActionsDAG::clone(&row_policy_filter_info->actions)); + table_expression_data.setRowLevelFilterActions(row_policy_filter_info->actions.clone()); add_filter(*row_policy_filter_info, "Row-level security filter"); } diff --git a/src/Planner/TableExpressionData.h b/src/Planner/TableExpressionData.h index 9723a00a356..1d04fac3dc3 100644 --- a/src/Planner/TableExpressionData.h +++ b/src/Planner/TableExpressionData.h @@ -211,32 +211,32 @@ public: is_merge_tree = is_merge_tree_value; } - const ActionsDAGPtr & getPrewhereFilterActions() const + const std::optional & getPrewhereFilterActions() const { return prewhere_filter_actions; } - void setRowLevelFilterActions(ActionsDAGPtr row_level_filter_actions_value) + void setRowLevelFilterActions(ActionsDAG row_level_filter_actions_value) { row_level_filter_actions = std::move(row_level_filter_actions_value); } - const ActionsDAGPtr & getRowLevelFilterActions() const + const std::optional & getRowLevelFilterActions() const { return row_level_filter_actions; } - void setPrewhereFilterActions(ActionsDAGPtr prewhere_filter_actions_value) + void setPrewhereFilterActions(ActionsDAG prewhere_filter_actions_value) { prewhere_filter_actions = std::move(prewhere_filter_actions_value); } - const ActionsDAGPtr & getFilterActions() const + const std::optional & getFilterActions() const { return filter_actions; } - void setFilterActions(ActionsDAGPtr filter_actions_value) + void setFilterActions(ActionsDAG filter_actions_value) { filter_actions = std::move(filter_actions_value); } @@ -289,16 +289,16 @@ private: ColumnIdentifierToColumnName column_identifier_to_column_name; /// Valid for table, table function - ActionsDAGPtr filter_actions; + std::optional filter_actions; /// Valid for table, table function PrewhereInfoPtr prewhere_info; /// Valid for table, table function - ActionsDAGPtr prewhere_filter_actions; + std::optional prewhere_filter_actions; /// Valid for table, table function - ActionsDAGPtr row_level_filter_actions; + std::optional row_level_filter_actions; /// Is storage remote bool is_remote = false; From 264d7d760fedd3fc3c900d13ee1f7976887efaa7 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Sat, 13 Jul 2024 15:52:23 +0000 Subject: [PATCH 039/661] Bump rocksdb to 7.0.4 --- contrib/rocksdb | 2 +- contrib/rocksdb-cmake/CMakeLists.txt | 9 +++------ 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/contrib/rocksdb b/contrib/rocksdb index be366233921..4fc59e24001 160000 --- a/contrib/rocksdb +++ b/contrib/rocksdb @@ -1 +1 @@ -Subproject commit be366233921293bd07a84dc4ea6991858665f202 +Subproject commit 4fc59e240016a62180b09703e2938c3d7e928de0 diff --git a/contrib/rocksdb-cmake/CMakeLists.txt b/contrib/rocksdb-cmake/CMakeLists.txt index 3a14407166c..f6479346063 100644 --- a/contrib/rocksdb-cmake/CMakeLists.txt +++ b/contrib/rocksdb-cmake/CMakeLists.txt @@ -59,10 +59,8 @@ if(CMAKE_SYSTEM_PROCESSOR MATCHES "arm64|aarch64|AARCH64") # set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=armv8-a+crc+crypto -Wno-unused-function") endif() -set (HAVE_THREAD_LOCAL 1) -if(HAVE_THREAD_LOCAL) - add_definitions(-DROCKSDB_SUPPORT_THREAD_LOCAL) -endif() +# thread_local is part of C++11 and later (TODO: clean up this define) +add_definitions(-DROCKSDB_SUPPORT_THREAD_LOCAL) if(CMAKE_SYSTEM_NAME MATCHES "Darwin") add_definitions(-DOS_MACOSX) @@ -182,7 +180,6 @@ set(SOURCES ${ROCKSDB_SOURCE_DIR}/env/env.cc ${ROCKSDB_SOURCE_DIR}/env/env_chroot.cc ${ROCKSDB_SOURCE_DIR}/env/env_encryption.cc - ${ROCKSDB_SOURCE_DIR}/env/env_hdfs.cc ${ROCKSDB_SOURCE_DIR}/env/file_system.cc ${ROCKSDB_SOURCE_DIR}/env/file_system_tracer.cc ${ROCKSDB_SOURCE_DIR}/env/fs_remap.cc @@ -311,7 +308,6 @@ set(SOURCES ${ROCKSDB_SOURCE_DIR}/util/murmurhash.cc ${ROCKSDB_SOURCE_DIR}/util/random.cc ${ROCKSDB_SOURCE_DIR}/util/rate_limiter.cc - ${ROCKSDB_SOURCE_DIR}/util/regex.cc ${ROCKSDB_SOURCE_DIR}/util/ribbon_config.cc ${ROCKSDB_SOURCE_DIR}/util/slice.cc ${ROCKSDB_SOURCE_DIR}/util/file_checksum_helper.cc @@ -335,6 +331,7 @@ set(SOURCES ${ROCKSDB_SOURCE_DIR}/utilities/checkpoint/checkpoint_impl.cc ${ROCKSDB_SOURCE_DIR}/utilities/compaction_filters.cc ${ROCKSDB_SOURCE_DIR}/utilities/compaction_filters/remove_emptyvalue_compactionfilter.cc + ${ROCKSDB_SOURCE_DIR}/utilities/counted_fs.cc ${ROCKSDB_SOURCE_DIR}/utilities/debug.cc ${ROCKSDB_SOURCE_DIR}/utilities/env_mirror.cc ${ROCKSDB_SOURCE_DIR}/utilities/env_timed.cc From fee7e22c1f60feb0a4c176355453caad18cd5bc1 Mon Sep 17 00:00:00 2001 From: xogoodnow Date: Sun, 14 Jul 2024 15:23:34 +0330 Subject: [PATCH 040/661] Changed the error code --- src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.cpp b/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.cpp index 4388864434e..87a44db573d 100644 --- a/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.cpp +++ b/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.cpp @@ -36,7 +36,7 @@ namespace ErrorCodes { extern const int LOGICAL_ERROR; extern const int BAD_ARGUMENTS; - extern const int QUERY_NOT_ALLOWED; + extern const int BAD_QUERY_PARAMETER; } namespace @@ -150,7 +150,7 @@ StorageObjectStorageQueue::StorageObjectStorageQueue( } else if (!configuration->isPathWithGlobs()) { - throw Exception(ErrorCodes::QUERY_NOT_ALLOWED, "ObjectStorageQueue url must either end with '/' or contain globs"); + throw Exception(ErrorCodes::BAD_QUERY_PARAMETER, "ObjectStorageQueue url must either end with '/' or contain globs"); } checkAndAdjustSettings(*queue_settings, engine_args, mode > LoadingStrictnessLevel::CREATE, log); From b5cb264b017e965037dbb0bd4623df5f5a65ec0b Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Sat, 13 Jul 2024 15:41:30 +0000 Subject: [PATCH 041/661] Bump ICU to 71 --- contrib/icu | 2 +- contrib/icu-cmake/CMakeLists.txt | 10 +++++----- contrib/icudata | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/contrib/icu b/contrib/icu index a56dde820dc..c205e7ee49a 160000 --- a/contrib/icu +++ b/contrib/icu @@ -1 +1 @@ -Subproject commit a56dde820dc35665a66f2e9ee8ba58e75049b668 +Subproject commit c205e7ee49a7086a28b9c275fcfdac9ca3dc815d diff --git a/contrib/icu-cmake/CMakeLists.txt b/contrib/icu-cmake/CMakeLists.txt index 0a650f2bcc0..f23b0002b8d 100644 --- a/contrib/icu-cmake/CMakeLists.txt +++ b/contrib/icu-cmake/CMakeLists.txt @@ -4,7 +4,9 @@ else () option(ENABLE_ICU "Enable ICU" 0) endif () -if (NOT ENABLE_ICU) +# Temporarily disabled s390x because the ICU build links a blob (icudt71b_dat.S) and our friends from IBM did not explain how they re-generated +# the blob on s390x: https://github.com/ClickHouse/icudata/pull/2#issuecomment-2226957255 +if (NOT ENABLE_ICU OR ARCH_S390X) message(STATUS "Not using ICU") return() endif() @@ -12,8 +14,6 @@ endif() set(ICU_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/icu/icu4c/source") set(ICUDATA_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/icudata/") -set (CMAKE_CXX_STANDARD 17) - # These lists of sources were generated from build log of the original ICU build system (configure + make). set(ICUUC_SOURCES @@ -462,9 +462,9 @@ file(GENERATE OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/empty.cpp" CONTENT " ") enable_language(ASM) if (ARCH_S390X) - set(ICUDATA_SOURCE_FILE "${ICUDATA_SOURCE_DIR}/icudt70b_dat.S" ) + set(ICUDATA_SOURCE_FILE "${ICUDATA_SOURCE_DIR}/icudt71b_dat.S" ) else() - set(ICUDATA_SOURCE_FILE "${ICUDATA_SOURCE_DIR}/icudt70l_dat.S" ) + set(ICUDATA_SOURCE_FILE "${ICUDATA_SOURCE_DIR}/icudt71l_dat.S" ) endif() set(ICUDATA_SOURCES diff --git a/contrib/icudata b/contrib/icudata index c8e717892a5..e7488edd1f1 160000 --- a/contrib/icudata +++ b/contrib/icudata @@ -1 +1 @@ -Subproject commit c8e717892a557b4d2852317c7d628aacc0a0e5ab +Subproject commit e7488edd1f141b0664553a985a6fcd0125279527 From 5b6956ea234962cca5414a3fb0a6191407b4305a Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Sat, 13 Jul 2024 16:48:09 +0000 Subject: [PATCH 042/661] Bump rocksdb to v7.10.2 --- contrib/rocksdb | 2 +- contrib/rocksdb-cmake/CMakeLists.txt | 53 +++++++++++++------------- contrib/rocksdb-cmake/build_version.cc | 31 +++++++++++---- 3 files changed, 51 insertions(+), 35 deletions(-) diff --git a/contrib/rocksdb b/contrib/rocksdb index 4fc59e24001..01e43568fa9 160000 --- a/contrib/rocksdb +++ b/contrib/rocksdb @@ -1 +1 @@ -Subproject commit 4fc59e240016a62180b09703e2938c3d7e928de0 +Subproject commit 01e43568fa9f3f7bf107b2b66c00b286b456f33e diff --git a/contrib/rocksdb-cmake/CMakeLists.txt b/contrib/rocksdb-cmake/CMakeLists.txt index f6479346063..98790158baa 100644 --- a/contrib/rocksdb-cmake/CMakeLists.txt +++ b/contrib/rocksdb-cmake/CMakeLists.txt @@ -5,6 +5,9 @@ if (NOT ENABLE_ROCKSDB) return() endif() +# not in original build system, otherwise xxHash.cc fails to compile with ClickHouse C++23 default +set (CMAKE_CXX_STANDARD 20) + # Always disable jemalloc for rocksdb by default because it introduces non-standard jemalloc APIs option(WITH_JEMALLOC "build with JeMalloc" OFF) @@ -16,14 +19,6 @@ option(WITH_LZ4 "build with lz4" ON) option(WITH_ZLIB "build with zlib" ON) option(WITH_ZSTD "build with zstd" ON) -# third-party/folly is only validated to work on Linux and Windows for now. -# So only turn it on there by default. -if(CMAKE_SYSTEM_NAME MATCHES "Linux|Windows") - option(WITH_FOLLY_DISTRIBUTED_MUTEX "build with folly::DistributedMutex" ON) -else() - option(WITH_FOLLY_DISTRIBUTED_MUTEX "build with folly::DistributedMutex" OFF) -endif() - if(WITH_SNAPPY) add_definitions(-DSNAPPY) list(APPEND THIRDPARTY_LIBS ch_contrib::snappy) @@ -44,7 +39,7 @@ if(WITH_ZSTD) list(APPEND THIRDPARTY_LIBS ch_contrib::zstd) endif() -option(PORTABLE "build a portable binary" ON) +add_definitions(-DROCKSDB_PORTABLE) if(ENABLE_SSE42 AND ENABLE_PCLMULQDQ) add_definitions(-DHAVE_SSE42) @@ -59,9 +54,6 @@ if(CMAKE_SYSTEM_PROCESSOR MATCHES "arm64|aarch64|AARCH64") # set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=armv8-a+crc+crypto -Wno-unused-function") endif() -# thread_local is part of C++11 and later (TODO: clean up this define) -add_definitions(-DROCKSDB_SUPPORT_THREAD_LOCAL) - if(CMAKE_SYSTEM_NAME MATCHES "Darwin") add_definitions(-DOS_MACOSX) elseif(CMAKE_SYSTEM_NAME MATCHES "Linux") @@ -87,19 +79,21 @@ set(ROCKSDB_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/rocksdb") include_directories(${ROCKSDB_SOURCE_DIR}) include_directories("${ROCKSDB_SOURCE_DIR}/include") -if(WITH_FOLLY_DISTRIBUTED_MUTEX) - include_directories("${ROCKSDB_SOURCE_DIR}/third-party/folly") -endif() set(SOURCES ${ROCKSDB_SOURCE_DIR}/cache/cache.cc ${ROCKSDB_SOURCE_DIR}/cache/cache_entry_roles.cc ${ROCKSDB_SOURCE_DIR}/cache/cache_key.cc + ${ROCKSDB_SOURCE_DIR}/cache/cache_helpers.cc ${ROCKSDB_SOURCE_DIR}/cache/cache_reservation_manager.cc + ${ROCKSDB_SOURCE_DIR}/cache/charged_cache.cc ${ROCKSDB_SOURCE_DIR}/cache/clock_cache.cc + ${ROCKSDB_SOURCE_DIR}/cache/compressed_secondary_cache.cc ${ROCKSDB_SOURCE_DIR}/cache/lru_cache.cc + ${ROCKSDB_SOURCE_DIR}/cache/secondary_cache.cc ${ROCKSDB_SOURCE_DIR}/cache/sharded_cache.cc ${ROCKSDB_SOURCE_DIR}/db/arena_wrapped_db_iter.cc + ${ROCKSDB_SOURCE_DIR}/db/blob/blob_contents.cc ${ROCKSDB_SOURCE_DIR}/db/blob/blob_fetcher.cc ${ROCKSDB_SOURCE_DIR}/db/blob/blob_file_addition.cc ${ROCKSDB_SOURCE_DIR}/db/blob/blob_file_builder.cc @@ -111,6 +105,7 @@ set(SOURCES ${ROCKSDB_SOURCE_DIR}/db/blob/blob_log_format.cc ${ROCKSDB_SOURCE_DIR}/db/blob/blob_log_sequential_reader.cc ${ROCKSDB_SOURCE_DIR}/db/blob/blob_log_writer.cc + ${ROCKSDB_SOURCE_DIR}/db/blob/blob_source.cc ${ROCKSDB_SOURCE_DIR}/db/blob/prefetch_buffer_collection.cc ${ROCKSDB_SOURCE_DIR}/db/builder.cc ${ROCKSDB_SOURCE_DIR}/db/c.cc @@ -122,7 +117,11 @@ set(SOURCES ${ROCKSDB_SOURCE_DIR}/db/compaction/compaction_picker_fifo.cc ${ROCKSDB_SOURCE_DIR}/db/compaction/compaction_picker_level.cc ${ROCKSDB_SOURCE_DIR}/db/compaction/compaction_picker_universal.cc + ${ROCKSDB_SOURCE_DIR}/db/compaction/compaction_service_job.cc + ${ROCKSDB_SOURCE_DIR}/db/compaction/compaction_state.cc + ${ROCKSDB_SOURCE_DIR}/db/compaction/compaction_outputs.cc ${ROCKSDB_SOURCE_DIR}/db/compaction/sst_partitioner.cc + ${ROCKSDB_SOURCE_DIR}/db/compaction/subcompaction_state.cc ${ROCKSDB_SOURCE_DIR}/db/convenience.cc ${ROCKSDB_SOURCE_DIR}/db/db_filesnapshot.cc ${ROCKSDB_SOURCE_DIR}/db/db_impl/compacted_db_impl.cc @@ -157,10 +156,11 @@ set(SOURCES ${ROCKSDB_SOURCE_DIR}/db/merge_helper.cc ${ROCKSDB_SOURCE_DIR}/db/merge_operator.cc ${ROCKSDB_SOURCE_DIR}/db/output_validator.cc - ${ROCKSDB_SOURCE_DIR}/db/periodic_work_scheduler.cc + ${ROCKSDB_SOURCE_DIR}/db/periodic_task_scheduler.cc ${ROCKSDB_SOURCE_DIR}/db/range_del_aggregator.cc ${ROCKSDB_SOURCE_DIR}/db/range_tombstone_fragmenter.cc ${ROCKSDB_SOURCE_DIR}/db/repair.cc + ${ROCKSDB_SOURCE_DIR}/db/seqno_to_time_mapping.cc ${ROCKSDB_SOURCE_DIR}/db/snapshot_impl.cc ${ROCKSDB_SOURCE_DIR}/db/table_cache.cc ${ROCKSDB_SOURCE_DIR}/db/table_properties_collector.cc @@ -172,6 +172,8 @@ set(SOURCES ${ROCKSDB_SOURCE_DIR}/db/version_set.cc ${ROCKSDB_SOURCE_DIR}/db/wal_edit.cc ${ROCKSDB_SOURCE_DIR}/db/wal_manager.cc + ${ROCKSDB_SOURCE_DIR}/db/wide/wide_column_serialization.cc + ${ROCKSDB_SOURCE_DIR}/db/wide/wide_columns.cc ${ROCKSDB_SOURCE_DIR}/db/write_batch.cc ${ROCKSDB_SOURCE_DIR}/db/write_batch_base.cc ${ROCKSDB_SOURCE_DIR}/db/write_controller.cc @@ -230,16 +232,17 @@ set(SOURCES ${ROCKSDB_SOURCE_DIR}/options/options.cc ${ROCKSDB_SOURCE_DIR}/options/options_helper.cc ${ROCKSDB_SOURCE_DIR}/options/options_parser.cc + ${ROCKSDB_SOURCE_DIR}/port/mmap.cc ${ROCKSDB_SOURCE_DIR}/port/stack_trace.cc ${ROCKSDB_SOURCE_DIR}/table/adaptive/adaptive_table_factory.cc ${ROCKSDB_SOURCE_DIR}/table/block_based/binary_search_index_reader.cc ${ROCKSDB_SOURCE_DIR}/table/block_based/block.cc - ${ROCKSDB_SOURCE_DIR}/table/block_based/block_based_filter_block.cc ${ROCKSDB_SOURCE_DIR}/table/block_based/block_based_table_builder.cc ${ROCKSDB_SOURCE_DIR}/table/block_based/block_based_table_factory.cc ${ROCKSDB_SOURCE_DIR}/table/block_based/block_based_table_iterator.cc ${ROCKSDB_SOURCE_DIR}/table/block_based/block_based_table_reader.cc ${ROCKSDB_SOURCE_DIR}/table/block_based/block_builder.cc + ${ROCKSDB_SOURCE_DIR}/table/block_based/block_cache.cc ${ROCKSDB_SOURCE_DIR}/table/block_based/block_prefetcher.cc ${ROCKSDB_SOURCE_DIR}/table/block_based/block_prefix_index.cc ${ROCKSDB_SOURCE_DIR}/table/block_based/data_block_hash_index.cc @@ -297,9 +300,12 @@ set(SOURCES ${ROCKSDB_SOURCE_DIR}/trace_replay/trace_record_result.cc ${ROCKSDB_SOURCE_DIR}/trace_replay/trace_record.cc ${ROCKSDB_SOURCE_DIR}/trace_replay/trace_replay.cc + ${ROCKSDB_SOURCE_DIR}/util/async_file_reader.cc + ${ROCKSDB_SOURCE_DIR}/util/cleanable.cc ${ROCKSDB_SOURCE_DIR}/util/coding.cc ${ROCKSDB_SOURCE_DIR}/util/compaction_job_stats_impl.cc ${ROCKSDB_SOURCE_DIR}/util/comparator.cc + ${ROCKSDB_SOURCE_DIR}/util/compression.cc ${ROCKSDB_SOURCE_DIR}/util/compression_context_cache.cc ${ROCKSDB_SOURCE_DIR}/util/concurrent_task_limiter_impl.cc ${ROCKSDB_SOURCE_DIR}/util/crc32c.cc @@ -312,11 +318,13 @@ set(SOURCES ${ROCKSDB_SOURCE_DIR}/util/slice.cc ${ROCKSDB_SOURCE_DIR}/util/file_checksum_helper.cc ${ROCKSDB_SOURCE_DIR}/util/status.cc + ${ROCKSDB_SOURCE_DIR}/util/stderr_logger.cc ${ROCKSDB_SOURCE_DIR}/util/string_util.cc ${ROCKSDB_SOURCE_DIR}/util/thread_local.cc ${ROCKSDB_SOURCE_DIR}/util/threadpool_imp.cc ${ROCKSDB_SOURCE_DIR}/util/xxhash.cc - ${ROCKSDB_SOURCE_DIR}/utilities/backupable/backupable_db.cc + ${ROCKSDB_SOURCE_DIR}/utilities/agg_merge/agg_merge.cc + ${ROCKSDB_SOURCE_DIR}/utilities/backup/backup_engine.cc ${ROCKSDB_SOURCE_DIR}/utilities/blob_db/blob_compaction_filter.cc ${ROCKSDB_SOURCE_DIR}/utilities/blob_db/blob_db.cc ${ROCKSDB_SOURCE_DIR}/utilities/blob_db/blob_db_impl.cc @@ -419,15 +427,6 @@ list(APPEND SOURCES "${ROCKSDB_SOURCE_DIR}/env/fs_posix.cc" "${ROCKSDB_SOURCE_DIR}/env/io_posix.cc") -if(WITH_FOLLY_DISTRIBUTED_MUTEX) - list(APPEND SOURCES - "${ROCKSDB_SOURCE_DIR}/third-party/folly/folly/detail/Futex.cpp" - "${ROCKSDB_SOURCE_DIR}/third-party/folly/folly/synchronization/AtomicNotification.cpp" - "${ROCKSDB_SOURCE_DIR}/third-party/folly/folly/synchronization/DistributedMutex.cpp" - "${ROCKSDB_SOURCE_DIR}/third-party/folly/folly/synchronization/ParkingLot.cpp" - "${ROCKSDB_SOURCE_DIR}/third-party/folly/folly/synchronization/WaitOptions.cpp") -endif() - add_library(_rocksdb ${SOURCES}) add_library(ch_contrib::rocksdb ALIAS _rocksdb) target_link_libraries(_rocksdb PRIVATE ${THIRDPARTY_LIBS} ${SYSTEM_LIBS}) diff --git a/contrib/rocksdb-cmake/build_version.cc b/contrib/rocksdb-cmake/build_version.cc index f9639da516f..d5ea56673e0 100644 --- a/contrib/rocksdb-cmake/build_version.cc +++ b/contrib/rocksdb-cmake/build_version.cc @@ -1,16 +1,33 @@ // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -/// This file was edited for ClickHouse. #include #include "rocksdb/version.h" +#include "rocksdb/utilities/object_registry.h" #include "util/string_util.h" // The build script may replace these values with real values based // on whether or not GIT is available and the platform settings -static const std::string rocksdb_build_git_sha = "rocksdb_build_git_sha:0"; -static const std::string rocksdb_build_git_tag = "rocksdb_build_git_tag:master"; -static const std::string rocksdb_build_date = "rocksdb_build_date:2000-01-01"; +static const std::string rocksdb_build_git_sha = "rocksdb_build_git_sha:72438a678872544809393b831c7273794c074215"; +static const std::string rocksdb_build_git_tag = "rocksdb_build_git_tag:main"; +#define HAS_GIT_CHANGES 0 +#if HAS_GIT_CHANGES == 0 +// If HAS_GIT_CHANGES is 0, the GIT date is used. +// Use the time the branch/tag was last modified +static const std::string rocksdb_build_date = "rocksdb_build_date:2024-07-12 16:01:57"; +#else +// If HAS_GIT_CHANGES is > 0, the branch/tag has modifications. +// Use the time the build was created. +static const std::string rocksdb_build_date = "rocksdb_build_date:2024-07-13 17:15:50"; +#endif + +extern "C" { + +} // extern "C" + +std::unordered_map ROCKSDB_NAMESPACE::ObjectRegistry::builtins_ = { + +}; namespace ROCKSDB_NAMESPACE { static void AddProperty(std::unordered_map *props, const std::string& name) { @@ -39,12 +56,12 @@ const std::unordered_map& GetRocksBuildProperties() { } std::string GetRocksVersionAsString(bool with_patch) { - std::string version = ToString(ROCKSDB_MAJOR) + "." + ToString(ROCKSDB_MINOR); + std::string version = std::to_string(ROCKSDB_MAJOR) + "." + std::to_string(ROCKSDB_MINOR); if (with_patch) { - return version + "." + ToString(ROCKSDB_PATCH); + return version + "." + std::to_string(ROCKSDB_PATCH); } else { return version; - } + } } std::string GetRocksBuildInfoAsString(const std::string& program, bool verbose) { From 68aebce89f30eb7766c420d25e852e21e27dfe7d Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Mon, 15 Jul 2024 14:41:37 +0800 Subject: [PATCH 043/661] fix failed uts --- src/Functions/printf.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/Functions/printf.cpp b/src/Functions/printf.cpp index a890b886338..3efe854a53b 100644 --- a/src/Functions/printf.cpp +++ b/src/Functions/printf.cpp @@ -93,6 +93,8 @@ private: { T a = data[i]; s = fmt::sprintf(format, static_cast>(a)); + + res_chars.resize(curr_offset + s.size() + 1); memcpy(&res_chars[curr_offset], s.data(), s.size()); res_chars[curr_offset + s.size()] = 0; @@ -115,6 +117,8 @@ private: { auto a = concrete_column->getDataAt(i).toView(); s = fmt::sprintf(format, a); + + res_chars.resize(curr_offset + s.size() + 1); memcpy(&res_chars[curr_offset], s.data(), s.size()); res_chars[curr_offset + s.size()] = 0; From 2132ce52e0f72afe90e72e756d5ef494ad081ea9 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Mon, 15 Jul 2024 06:48:39 +0000 Subject: [PATCH 044/661] Bump ICU to 75 --- contrib/icu | 2 +- contrib/icu-cmake/CMakeLists.txt | 6 +++--- contrib/icudata | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/contrib/icu b/contrib/icu index c205e7ee49a..7750081bda4 160000 --- a/contrib/icu +++ b/contrib/icu @@ -1 +1 @@ -Subproject commit c205e7ee49a7086a28b9c275fcfdac9ca3dc815d +Subproject commit 7750081bda4b3bc1768ae03849ec70f67ea10625 diff --git a/contrib/icu-cmake/CMakeLists.txt b/contrib/icu-cmake/CMakeLists.txt index f23b0002b8d..f9d05f7fe97 100644 --- a/contrib/icu-cmake/CMakeLists.txt +++ b/contrib/icu-cmake/CMakeLists.txt @@ -4,7 +4,7 @@ else () option(ENABLE_ICU "Enable ICU" 0) endif () -# Temporarily disabled s390x because the ICU build links a blob (icudt71b_dat.S) and our friends from IBM did not explain how they re-generated +# Temporarily disabled s390x because the ICU build links a blob (icudt71b_dat.S) and our friends from IBM did not explain how they generated # the blob on s390x: https://github.com/ClickHouse/icudata/pull/2#issuecomment-2226957255 if (NOT ENABLE_ICU OR ARCH_S390X) message(STATUS "Not using ICU") @@ -462,9 +462,9 @@ file(GENERATE OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/empty.cpp" CONTENT " ") enable_language(ASM) if (ARCH_S390X) - set(ICUDATA_SOURCE_FILE "${ICUDATA_SOURCE_DIR}/icudt71b_dat.S" ) + set(ICUDATA_SOURCE_FILE "${ICUDATA_SOURCE_DIR}/icudt75b_dat.S" ) else() - set(ICUDATA_SOURCE_FILE "${ICUDATA_SOURCE_DIR}/icudt71l_dat.S" ) + set(ICUDATA_SOURCE_FILE "${ICUDATA_SOURCE_DIR}/icudt75l_dat.S" ) endif() set(ICUDATA_SOURCES diff --git a/contrib/icudata b/contrib/icudata index e7488edd1f1..d345d6ac22f 160000 --- a/contrib/icudata +++ b/contrib/icudata @@ -1 +1 @@ -Subproject commit e7488edd1f141b0664553a985a6fcd0125279527 +Subproject commit d345d6ac22f381c882420de9053d30ae1ff38d75 From 77272c925dc15acc5fdd0260a1c0aab35b1df3c3 Mon Sep 17 00:00:00 2001 From: Xu Jia Date: Mon, 15 Jul 2024 15:10:39 +0800 Subject: [PATCH 045/661] disable insertion and mutation --- docs/en/operations/settings/settings.md | 6 ++ src/Core/ServerSettings.h | 1 + src/Interpreters/InterpreterAlterQuery.cpp | 7 ++ src/Interpreters/InterpreterDeleteQuery.cpp | 4 + src/Interpreters/InterpreterInsertQuery.cpp | 5 ++ .../__init__.py | 0 .../config/cluster.xml | 16 ++++ .../config/reading_node.xml | 3 + .../config/storage_policy.xml | 21 +++++ .../config/writing_node.xml | 3 + .../test.py | 84 +++++++++++++++++++ 11 files changed, 150 insertions(+) create mode 100644 tests/integration/test_disable_insertion_and_mutation/__init__.py create mode 100644 tests/integration/test_disable_insertion_and_mutation/config/cluster.xml create mode 100644 tests/integration/test_disable_insertion_and_mutation/config/reading_node.xml create mode 100644 tests/integration/test_disable_insertion_and_mutation/config/storage_policy.xml create mode 100644 tests/integration/test_disable_insertion_and_mutation/config/writing_node.xml create mode 100644 tests/integration/test_disable_insertion_and_mutation/test.py diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index c3f697c3bdc..143ce836beb 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -5608,3 +5608,9 @@ Default value: `10000000`. Minimal size of block to compress in CROSS JOIN. Zero value means - disable this threshold. This block is compressed when any of the two thresholds (by rows or by bytes) are reached. Default value: `1GiB`. + +## disable_insertion_and_mutation + +Disable all insert and mutations (alter table update / alter table delete / alter table drop partition). Set to true, can make this node focus on reading queries. + +Default value: `false`. diff --git a/src/Core/ServerSettings.h b/src/Core/ServerSettings.h index 28b32a6e6a5..cf09874125d 100644 --- a/src/Core/ServerSettings.h +++ b/src/Core/ServerSettings.h @@ -157,6 +157,7 @@ namespace DB M(Bool, prepare_system_log_tables_on_startup, false, "If true, ClickHouse creates all configured `system.*_log` tables before the startup. It can be helpful if some startup scripts depend on these tables.", 0) \ M(Double, gwp_asan_force_sample_probability, 0.0003, "Probability that an allocation from specific places will be sampled by GWP Asan (i.e. PODArray allocations)", 0) \ M(UInt64, config_reload_interval_ms, 2000, "How often clickhouse will reload config and check for new changes", 0) \ + M(Bool, disable_insertion_and_mutation, false, "Disable all insert/alter/delete queries. This setting will be enabled if someone needs read-only nodes to prevent insertion and mutation affect reading performance.", 0) /// If you add a setting which can be updated at runtime, please update 'changeable_settings' map in StorageSystemServerSettings.cpp diff --git a/src/Interpreters/InterpreterAlterQuery.cpp b/src/Interpreters/InterpreterAlterQuery.cpp index c70a3397f4e..b9dd59909e6 100644 --- a/src/Interpreters/InterpreterAlterQuery.cpp +++ b/src/Interpreters/InterpreterAlterQuery.cpp @@ -46,6 +46,7 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; extern const int UNKNOWN_TABLE; extern const int UNKNOWN_DATABASE; + extern const int QUERY_IS_PROHIBITED; } @@ -191,6 +192,12 @@ BlockIO InterpreterAlterQuery::executeToTable(const ASTAlterQuery & alter) "to execute ALTERs of different types (replicated and non replicated) in single query"); } + if (mutation_commands.hasNonEmptyMutationCommands() || !partition_commands.empty()) + { + if (getContext()->getServerSettings().disable_insertion_and_mutation) + throw Exception(ErrorCodes::QUERY_IS_PROHIBITED, "Mutations are prohibited"); + } + if (!alter_commands.empty()) { auto alter_lock = table->lockForAlter(getContext()->getSettingsRef().lock_acquire_timeout); diff --git a/src/Interpreters/InterpreterDeleteQuery.cpp b/src/Interpreters/InterpreterDeleteQuery.cpp index 39d5d9e9cef..5f3e3385148 100644 --- a/src/Interpreters/InterpreterDeleteQuery.cpp +++ b/src/Interpreters/InterpreterDeleteQuery.cpp @@ -26,6 +26,7 @@ namespace ErrorCodes extern const int SUPPORT_IS_DISABLED; extern const int BAD_ARGUMENTS; extern const int NOT_IMPLEMENTED; + extern const int QUERY_IS_PROHIBITED; } @@ -50,6 +51,9 @@ BlockIO InterpreterDeleteQuery::execute() if (table->isStaticStorage()) throw Exception(ErrorCodes::TABLE_IS_READ_ONLY, "Table is read-only"); + if (getContext()->getGlobalContext()->getServerSettings().disable_insertion_and_mutation) + throw Exception(ErrorCodes::QUERY_IS_PROHIBITED, "Delete queries are prohibited"); + DatabasePtr database = DatabaseCatalog::instance().getDatabase(table_id.database_name); if (database->shouldReplicateQuery(getContext(), query_ptr)) { diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index f396db70d21..b62a71de884 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -44,6 +44,7 @@ namespace ProfileEvents { extern const Event InsertQueriesWithSubqueries; extern const Event QueriesWithSubqueries; + extern const int QUERY_IS_PROHIBITED; } namespace DB @@ -406,6 +407,10 @@ BlockIO InterpreterInsertQuery::execute() StoragePtr table = getTable(query); checkStorageSupportsTransactionsIfNeeded(table, getContext()); + if (getContext()->getServerSettings().disable_insertion_and_mutation + && query.table_id.database_name != DatabaseCatalog::SYSTEM_DATABASE) + throw Exception(ErrorCodes::QUERY_IS_PROHIBITED, "Insert queries are prohibited"); + StoragePtr inner_table; if (const auto * mv = dynamic_cast(table.get())) inner_table = mv->getTargetTable(); diff --git a/tests/integration/test_disable_insertion_and_mutation/__init__.py b/tests/integration/test_disable_insertion_and_mutation/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_disable_insertion_and_mutation/config/cluster.xml b/tests/integration/test_disable_insertion_and_mutation/config/cluster.xml new file mode 100644 index 00000000000..17782a77679 --- /dev/null +++ b/tests/integration/test_disable_insertion_and_mutation/config/cluster.xml @@ -0,0 +1,16 @@ + + + + + + writing_node + 9000 + + + reading_node + 9000 + + + + + \ No newline at end of file diff --git a/tests/integration/test_disable_insertion_and_mutation/config/reading_node.xml b/tests/integration/test_disable_insertion_and_mutation/config/reading_node.xml new file mode 100644 index 00000000000..becabce8a44 --- /dev/null +++ b/tests/integration/test_disable_insertion_and_mutation/config/reading_node.xml @@ -0,0 +1,3 @@ + + true + \ No newline at end of file diff --git a/tests/integration/test_disable_insertion_and_mutation/config/storage_policy.xml b/tests/integration/test_disable_insertion_and_mutation/config/storage_policy.xml new file mode 100644 index 00000000000..cec96cfcc1a --- /dev/null +++ b/tests/integration/test_disable_insertion_and_mutation/config/storage_policy.xml @@ -0,0 +1,21 @@ + + + + + s3_with_keeper + http://minio1:9001/root/data/ + minio + minio123 + + + + + +

+ s3_with_keeper +
+ + + + + \ No newline at end of file diff --git a/tests/integration/test_disable_insertion_and_mutation/config/writing_node.xml b/tests/integration/test_disable_insertion_and_mutation/config/writing_node.xml new file mode 100644 index 00000000000..0737af7afc7 --- /dev/null +++ b/tests/integration/test_disable_insertion_and_mutation/config/writing_node.xml @@ -0,0 +1,3 @@ + + false + \ No newline at end of file diff --git a/tests/integration/test_disable_insertion_and_mutation/test.py b/tests/integration/test_disable_insertion_and_mutation/test.py new file mode 100644 index 00000000000..5234ae9c57c --- /dev/null +++ b/tests/integration/test_disable_insertion_and_mutation/test.py @@ -0,0 +1,84 @@ +import pytest +from helpers.client import QueryRuntimeException +from helpers.cluster import ClickHouseCluster +import time + +cluster = ClickHouseCluster(__file__) + +writing_node = cluster.add_instance( + "writing_node", + main_configs=["config/writing_node.xml", "config/storage_policy.xml", "config/cluster.xml"], + with_zookeeper=True, + with_minio=True, + stay_alive=True, + macros={"shard": 1, "replica": 1}, +) +reading_node = cluster.add_instance( + "reading_node", + main_configs=["config/reading_node.xml", "config/storage_policy.xml", "config/cluster.xml"], + with_zookeeper=True, + with_minio=True, + stay_alive=True, + macros={"shard": 1, "replica": 2}, +) + + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + + yield cluster + + finally: + cluster.shutdown() + + +def test_disable_insertion_and_mutation(started_cluster): + writing_node.query("""CREATE TABLE my_table on cluster default (key UInt64, value String) ENGINE=ReplicatedMergeTree('/clickhouse/tables/{shard}/default.my_table', '{replica}') ORDER BY key partition by (key % 5) SETTINGS storage_policy='s3_with_keeper' """) + + assert ( + "QUERY_IS_PROHIBITED" + in reading_node.query_and_get_error("INSERT INTO my_table VALUES (1, 'hello')") + ) + + assert ( + "QUERY_IS_PROHIBITED" + in reading_node.query_and_get_error("INSERT INTO my_table SETTINGS async_insert = 1 VALUES (1, 'hello')") + ) + + assert ( + "QUERY_IS_PROHIBITED" + in reading_node.query_and_get_error("ALTER TABLE my_table delete where 1") + ) + + assert ( + "QUERY_IS_PROHIBITED" + in reading_node.query_and_get_error("ALTER table my_table update key = 1 where 1") + ) + + assert ( + "QUERY_IS_PROHIBITED" + in reading_node.query_and_get_error("ALTER TABLE my_table drop partition 0") + ) + + reading_node.query("SELECT * from my_table"); + writing_node.query("INSERT INTO my_table VALUES (1, 'hello')") + writing_node.query("ALTER TABLE my_table delete where 1") + writing_node.query("ALTER table my_table update value = 'no hello' where 1") + + reading_node.query("ALTER TABLE my_table ADD COLUMN new_column UInt64") + writing_node.query("SELECT new_column from my_table") + reading_node.query("SELECT new_column from my_table") + + reading_node.query("ALter Table my_table MODIFY COLUMN new_column String") + + assert( + "new_column\tString" + in reading_node.query("DESC my_table") + ) + + assert( + "new_column\tString" + in writing_node.query("DESC my_table") + ) From fc29ac7891eddd3a714f5af574c71040f91f451d Mon Sep 17 00:00:00 2001 From: Xu Jia Date: Mon, 15 Jul 2024 17:06:37 +0800 Subject: [PATCH 046/661] add error extern to fix compile error --- src/Interpreters/InterpreterInsertQuery.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index b62a71de884..c01b2196ac9 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -56,6 +56,7 @@ namespace ErrorCodes extern const int NO_SUCH_COLUMN_IN_TABLE; extern const int ILLEGAL_COLUMN; extern const int DUPLICATE_COLUMN; + extern const int QUERY_IS_PROHIBITED; } InterpreterInsertQuery::InterpreterInsertQuery( From ccba078da10bed8d42e821f8bcdd47f448d198a0 Mon Sep 17 00:00:00 2001 From: Xu Jia Date: Mon, 15 Jul 2024 09:46:31 +0000 Subject: [PATCH 047/661] change storage policy to default --- .../config/storage_policy.xml | 21 ------------------- .../test.py | 6 +++--- 2 files changed, 3 insertions(+), 24 deletions(-) delete mode 100644 tests/integration/test_disable_insertion_and_mutation/config/storage_policy.xml diff --git a/tests/integration/test_disable_insertion_and_mutation/config/storage_policy.xml b/tests/integration/test_disable_insertion_and_mutation/config/storage_policy.xml deleted file mode 100644 index cec96cfcc1a..00000000000 --- a/tests/integration/test_disable_insertion_and_mutation/config/storage_policy.xml +++ /dev/null @@ -1,21 +0,0 @@ - - - - - s3_with_keeper - http://minio1:9001/root/data/ - minio - minio123 - - - - - -
- s3_with_keeper -
-
-
-
-
-
\ No newline at end of file diff --git a/tests/integration/test_disable_insertion_and_mutation/test.py b/tests/integration/test_disable_insertion_and_mutation/test.py index 5234ae9c57c..f098f130d2b 100644 --- a/tests/integration/test_disable_insertion_and_mutation/test.py +++ b/tests/integration/test_disable_insertion_and_mutation/test.py @@ -7,7 +7,7 @@ cluster = ClickHouseCluster(__file__) writing_node = cluster.add_instance( "writing_node", - main_configs=["config/writing_node.xml", "config/storage_policy.xml", "config/cluster.xml"], + main_configs=["config/writing_node.xml", "config/cluster.xml"], with_zookeeper=True, with_minio=True, stay_alive=True, @@ -15,7 +15,7 @@ writing_node = cluster.add_instance( ) reading_node = cluster.add_instance( "reading_node", - main_configs=["config/reading_node.xml", "config/storage_policy.xml", "config/cluster.xml"], + main_configs=["config/reading_node.xml", "config/cluster.xml"], with_zookeeper=True, with_minio=True, stay_alive=True, @@ -35,7 +35,7 @@ def started_cluster(): def test_disable_insertion_and_mutation(started_cluster): - writing_node.query("""CREATE TABLE my_table on cluster default (key UInt64, value String) ENGINE=ReplicatedMergeTree('/clickhouse/tables/{shard}/default.my_table', '{replica}') ORDER BY key partition by (key % 5) SETTINGS storage_policy='s3_with_keeper' """) + writing_node.query("""CREATE TABLE my_table on cluster default (key UInt64, value String) ENGINE=ReplicatedMergeTree('/clickhouse/tables/{shard}/default.my_table', '{replica}') ORDER BY key partition by (key % 5) """) assert ( "QUERY_IS_PROHIBITED" From b2466466d46ab1828b67693edf520d52ff45bc65 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Mon, 15 Jul 2024 14:28:12 +0200 Subject: [PATCH 048/661] Rename Context::getSettings() --- programs/local/LocalServer.cpp | 2 +- src/Analyzer/QueryTreeBuilder.cpp | 2 +- src/Analyzer/Resolve/QueryAnalyzer.cpp | 2 +- src/Analyzer/Utils.cpp | 2 +- src/Bridge/IBridge.cpp | 2 +- src/Client/ClientBase.cpp | 6 +++--- src/Databases/DatabaseDictionary.cpp | 4 ++-- src/Databases/DatabaseOnDisk.cpp | 4 ++-- src/Databases/MySQL/MaterializedMySQLSyncThread.cpp | 2 +- src/Functions/formatQuery.cpp | 2 +- src/Functions/hasColumnInTable.cpp | 2 +- src/Interpreters/Context.cpp | 2 +- src/Interpreters/Context.h | 5 ++--- src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp | 2 +- src/Interpreters/ExpressionAnalyzer.cpp | 4 ++-- src/Interpreters/InterpreterInsertQuery.cpp | 2 +- src/Interpreters/InterpreterSelectQuery.cpp | 2 +- src/Interpreters/JoinedTables.cpp | 2 +- src/Interpreters/ProcessList.cpp | 2 +- src/Interpreters/interpretSubquery.cpp | 2 +- src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp | 2 +- .../QueryPlan/Optimizations/optimizeReadInOrder.cpp | 2 +- src/Server/MySQLHandler.cpp | 2 +- .../Distributed/DistributedAsyncInsertDirectoryQueue.cpp | 2 +- src/Storages/MergeTree/MergedBlockOutputStream.cpp | 2 +- src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp | 2 +- src/Storages/StorageURL.cpp | 4 ++-- src/Storages/StorageView.cpp | 2 +- src/Storages/getStructureOfRemoteTable.cpp | 2 +- src/TableFunctions/Hive/TableFunctionHive.cpp | 2 +- 30 files changed, 37 insertions(+), 38 deletions(-) diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index 46b543e49e9..a7265ef0de4 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -80,7 +80,7 @@ namespace ErrorCodes void applySettingsOverridesForLocal(ContextMutablePtr context) { - Settings settings = context->getSettings(); + Settings settings = context->getSettingsCopy(); settings.allow_introspection_functions = true; settings.storage_file_read_method = LocalFSReadMethod::mmap; diff --git a/src/Analyzer/QueryTreeBuilder.cpp b/src/Analyzer/QueryTreeBuilder.cpp index a62b6e56ac5..0a732a3b3b3 100644 --- a/src/Analyzer/QueryTreeBuilder.cpp +++ b/src/Analyzer/QueryTreeBuilder.cpp @@ -237,7 +237,7 @@ QueryTreeNodePtr QueryTreeBuilder::buildSelectExpression(const ASTPtr & select_q /// Remove global settings limit and offset if (const auto & settings_ref = updated_context->getSettingsRef(); settings_ref.limit || settings_ref.offset) { - Settings settings = updated_context->getSettings(); + Settings settings = updated_context->getSettingsCopy(); limit = settings.limit; offset = settings.offset; settings.limit = 0; diff --git a/src/Analyzer/Resolve/QueryAnalyzer.cpp b/src/Analyzer/Resolve/QueryAnalyzer.cpp index 5f7b06231d9..92618dfe346 100644 --- a/src/Analyzer/Resolve/QueryAnalyzer.cpp +++ b/src/Analyzer/Resolve/QueryAnalyzer.cpp @@ -503,7 +503,7 @@ void QueryAnalyzer::evaluateScalarSubqueryIfNeeded(QueryTreeNodePtr & node, Iden ProfileEvents::increment(ProfileEvents::ScalarSubqueriesCacheMiss); auto subquery_context = Context::createCopy(context); - Settings subquery_settings = context->getSettings(); + Settings subquery_settings = context->getSettingsCopy(); subquery_settings.max_result_rows = 1; subquery_settings.extremes = false; subquery_context->setSettings(subquery_settings); diff --git a/src/Analyzer/Utils.cpp b/src/Analyzer/Utils.cpp index d10bbd9bd23..e5f372b7368 100644 --- a/src/Analyzer/Utils.cpp +++ b/src/Analyzer/Utils.cpp @@ -867,7 +867,7 @@ void updateContextForSubqueryExecution(ContextMutablePtr & mutable_context) * max_rows_in_join, max_bytes_in_join, join_overflow_mode, * which are checked separately (in the Set, Join objects). */ - Settings subquery_settings = mutable_context->getSettings(); + Settings subquery_settings = mutable_context->getSettingsCopy(); subquery_settings.max_result_rows = 0; subquery_settings.max_result_bytes = 0; /// The calculation of extremes does not make sense and is not necessary (if you do it, then the extremes of the subquery can be taken for whole query). diff --git a/src/Bridge/IBridge.cpp b/src/Bridge/IBridge.cpp index de48a4f2b84..5682a28f899 100644 --- a/src/Bridge/IBridge.cpp +++ b/src/Bridge/IBridge.cpp @@ -232,7 +232,7 @@ int IBridge::main(const std::vector & /*args*/) auto context = Context::createGlobal(shared_context.get()); context->makeGlobalContext(); - auto settings = context->getSettings(); + auto settings = context->getSettingsCopy(); settings.set("http_max_field_value_size", http_max_field_value_size); context->setSettings(settings); diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 5d472ba99b9..2dc603a307f 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -723,7 +723,7 @@ void ClientBase::initLogsOutputStream() void ClientBase::adjustSettings() { - Settings settings = global_context->getSettings(); + Settings settings = global_context->getSettingsCopy(); /// NOTE: Do not forget to set changed=false to avoid sending it to the server (to avoid breakage read only profiles) @@ -931,7 +931,7 @@ bool ClientBase::isSyncInsertWithData(const ASTInsertQuery & insert_query, const if (!insert_query.data) return false; - auto settings = context->getSettings(); + auto settings = context->getSettingsCopy(); if (insert_query.settings_ast) settings.applyChanges(insert_query.settings_ast->as()->changes); @@ -2696,7 +2696,7 @@ bool ClientBase::processMultiQueryFromFile(const String & file_name) if (!getClientConfiguration().has("log_comment")) { - Settings settings = global_context->getSettings(); + Settings settings = global_context->getSettingsCopy(); /// NOTE: cannot use even weakly_canonical() since it fails for /dev/stdin due to resolving of "pipe:[X]" settings.log_comment = fs::absolute(fs::path(file_name)); global_context->setSettings(settings); diff --git a/src/Databases/DatabaseDictionary.cpp b/src/Databases/DatabaseDictionary.cpp index 52196e75c4a..a9569408814 100644 --- a/src/Databases/DatabaseDictionary.cpp +++ b/src/Databases/DatabaseDictionary.cpp @@ -111,7 +111,7 @@ ASTPtr DatabaseDictionary::getCreateTableQueryImpl(const String & table_name, Co buffer << ") Engine = Dictionary(" << backQuoteIfNeed(table_name) << ")"; } - auto settings = getContext()->getSettingsRef(); + const auto & settings = getContext()->getSettingsRef(); ParserCreateQuery parser; const char * pos = query.data(); std::string error_message; @@ -133,7 +133,7 @@ ASTPtr DatabaseDictionary::getCreateDatabaseQuery() const if (const auto comment_value = getDatabaseComment(); !comment_value.empty()) buffer << " COMMENT " << backQuote(comment_value); } - auto settings = getContext()->getSettingsRef(); + const auto & settings = getContext()->getSettingsRef(); ParserCreateQuery parser; return parseQuery(parser, query.data(), query.data() + query.size(), "", 0, settings.max_parser_depth, settings.max_parser_backtracks); } diff --git a/src/Databases/DatabaseOnDisk.cpp b/src/Databases/DatabaseOnDisk.cpp index 07a250e72c7..261a917c595 100644 --- a/src/Databases/DatabaseOnDisk.cpp +++ b/src/Databases/DatabaseOnDisk.cpp @@ -523,7 +523,7 @@ ASTPtr DatabaseOnDisk::getCreateDatabaseQuery() const { ASTPtr ast; - auto settings = getContext()->getSettingsRef(); + const auto & settings = getContext()->getSettingsRef(); { std::lock_guard lock(mutex); auto database_metadata_path = getContext()->getPath() + "metadata/" + escapeForFileName(database_name) + ".sql"; @@ -722,7 +722,7 @@ ASTPtr DatabaseOnDisk::parseQueryFromMetadata( return nullptr; } - auto settings = local_context->getSettingsRef(); + const auto & settings = local_context->getSettingsRef(); ParserCreateQuery parser; const char * pos = query.data(); std::string error_message; diff --git a/src/Databases/MySQL/MaterializedMySQLSyncThread.cpp b/src/Databases/MySQL/MaterializedMySQLSyncThread.cpp index 2c342755337..04b4070d5af 100644 --- a/src/Databases/MySQL/MaterializedMySQLSyncThread.cpp +++ b/src/Databases/MySQL/MaterializedMySQLSyncThread.cpp @@ -89,7 +89,7 @@ static constexpr auto MYSQL_BACKGROUND_THREAD_NAME = "MySQLDBSync"; static ContextMutablePtr createQueryContext(ContextPtr context) { - Settings new_query_settings = context->getSettings(); + Settings new_query_settings = context->getSettingsCopy(); new_query_settings.insert_allow_materialized_columns = true; /// To avoid call AST::format diff --git a/src/Functions/formatQuery.cpp b/src/Functions/formatQuery.cpp index d10b3f9a5b7..f7f7e4b5bcb 100644 --- a/src/Functions/formatQuery.cpp +++ b/src/Functions/formatQuery.cpp @@ -39,7 +39,7 @@ public: FunctionFormatQuery(ContextPtr context, String name_, OutputFormatting output_formatting_, ErrorHandling error_handling_) : name(name_), output_formatting(output_formatting_), error_handling(error_handling_) { - const Settings & settings = context->getSettings(); + const Settings & settings = context->getSettingsRef(); max_query_size = settings.max_query_size; max_parser_depth = settings.max_parser_depth; max_parser_backtracks = settings.max_parser_backtracks; diff --git a/src/Functions/hasColumnInTable.cpp b/src/Functions/hasColumnInTable.cpp index 00714997b4a..cc496270b01 100644 --- a/src/Functions/hasColumnInTable.cpp +++ b/src/Functions/hasColumnInTable.cpp @@ -143,7 +143,7 @@ ColumnPtr FunctionHasColumnInTable::executeImpl(const ColumnsWithTypeAndName & a /* cluster_name= */ "", /* password= */ "" }; - auto cluster = std::make_shared(getContext()->getSettings(), host_names, params); + auto cluster = std::make_shared(getContext()->getSettingsRef(), host_names, params); // FIXME this (probably) needs a non-constant access to query context, // because it might initialized a storage. Ideally, the tables required diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index fc1e87e7b7e..3a88e0ccfe1 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -2267,7 +2267,7 @@ bool Context::displaySecretsInShowAndSelect() const return shared->server_settings.display_secrets_in_show_and_select; } -Settings Context::getSettings() const +Settings Context::getSettingsCopy() const { SharedLockGuard lock(mutex); return *settings; diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index 284cac50769..61095e53a17 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -830,7 +830,8 @@ public: void setMacros(std::unique_ptr && macros); bool displaySecretsInShowAndSelect() const; - Settings getSettings() const; + Settings getSettingsCopy() const; + const Settings & getSettingsRef() const { return *settings; } void setSettings(const Settings & settings_); /// Set settings by name. @@ -955,8 +956,6 @@ public: void makeSessionContext(); void makeGlobalContext(); - const Settings & getSettingsRef() const { return *settings; } - void setProgressCallback(ProgressCallback callback); /// Used in executeQuery() to pass it to the QueryPipeline. ProgressCallback getProgressCallback() const; diff --git a/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp b/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp index 4bd1c47d5a0..1ca8c40460c 100644 --- a/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp +++ b/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp @@ -74,7 +74,7 @@ void ExecuteScalarSubqueriesMatcher::visit(ASTPtr & ast, Data & data) static auto getQueryInterpreter(const ASTSubquery & subquery, ExecuteScalarSubqueriesMatcher::Data & data) { auto subquery_context = Context::createCopy(data.getContext()); - Settings subquery_settings = data.getContext()->getSettings(); + Settings subquery_settings = data.getContext()->getSettingsCopy(); subquery_settings.max_result_rows = 1; subquery_settings.extremes = false; subquery_context->setSettings(subquery_settings); diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index 16d0eb71278..1a4c02bdebb 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -171,7 +171,7 @@ ExpressionAnalyzer::ExpressionAnalyzer( PreparedSetsPtr prepared_sets_, bool is_create_parameterized_view_) : WithContext(context_) - , query(query_), settings(getContext()->getSettings()) + , query(query_), settings(getContext()->getSettingsRef()) , subquery_depth(subquery_depth_) , syntax(syntax_analyzer_result_) , is_create_parameterized_view(is_create_parameterized_view_) @@ -983,7 +983,7 @@ static std::shared_ptr tryCreateJoin( algorithm == JoinAlgorithm::PARALLEL_HASH || algorithm == JoinAlgorithm::DEFAULT) { - const auto & settings = context->getSettings(); + const auto & settings = context->getSettingsRef(); if (analyzed_join->allowParallelHashJoin()) return std::make_shared( diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index ef298d4d45a..dffa0cbaa5b 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -469,7 +469,7 @@ BlockIO InterpreterInsertQuery::execute() * to avoid unnecessary squashing. */ - Settings new_settings = getContext()->getSettings(); + Settings new_settings = getContext()->getSettingsCopy(); new_settings.max_threads = std::max(1, settings.max_insert_threads); diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index cd91f9532b9..cb3c478dbb1 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -253,7 +253,7 @@ namespace ContextPtr getSubqueryContext(const ContextPtr & context) { auto subquery_context = Context::createCopy(context); - Settings subquery_settings = context->getSettings(); + Settings subquery_settings = context->getSettingsCopy(); subquery_settings.max_result_rows = 0; subquery_settings.max_result_bytes = 0; /// The calculation of extremes does not make sense and is not necessary (if you do it, then the extremes of the subquery can be taken for whole query). diff --git a/src/Interpreters/JoinedTables.cpp b/src/Interpreters/JoinedTables.cpp index 0de2bf9cb1f..c5226107f8d 100644 --- a/src/Interpreters/JoinedTables.cpp +++ b/src/Interpreters/JoinedTables.cpp @@ -308,7 +308,7 @@ std::shared_ptr JoinedTables::makeTableJoin(const ASTSelectQuery & se if (tables_with_columns.size() < 2) return {}; - auto settings = context->getSettingsRef(); + const auto & settings = context->getSettingsRef(); MultiEnum join_algorithm = settings.join_algorithm; bool try_use_direct_join = join_algorithm.isSet(JoinAlgorithm::DIRECT) || join_algorithm.isSet(JoinAlgorithm::DEFAULT); auto table_join = std::make_shared(settings, context->getGlobalTemporaryVolume(), context->getTempDataOnDisk()); diff --git a/src/Interpreters/ProcessList.cpp b/src/Interpreters/ProcessList.cpp index 5b07852d9e3..271e23a7288 100644 --- a/src/Interpreters/ProcessList.cpp +++ b/src/Interpreters/ProcessList.cpp @@ -657,7 +657,7 @@ QueryStatusInfo QueryStatus::getInfo(bool get_thread_list, bool get_profile_even { if (auto ctx = context.lock()) { - res.query_settings = std::make_shared(ctx->getSettings()); + res.query_settings = std::make_shared(ctx->getSettingsRef()); res.current_database = ctx->getCurrentDatabase(); } } diff --git a/src/Interpreters/interpretSubquery.cpp b/src/Interpreters/interpretSubquery.cpp index 340f6d1d805..909875b99a0 100644 --- a/src/Interpreters/interpretSubquery.cpp +++ b/src/Interpreters/interpretSubquery.cpp @@ -62,7 +62,7 @@ std::shared_ptr interpretSubquery( * which are checked separately (in the Set, Join objects). */ auto subquery_context = Context::createCopy(context); - Settings subquery_settings = context->getSettings(); + Settings subquery_settings = context->getSettingsCopy(); subquery_settings.max_result_rows = 0; subquery_settings.max_result_bytes = 0; /// The calculation of `extremes` does not make sense and is not necessary (if you do it, then the `extremes` of the subquery can be taken instead of the whole query). diff --git a/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp b/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp index c23d717d52f..e467c358d1d 100644 --- a/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp @@ -406,7 +406,7 @@ bool ValuesBlockInputFormat::parseExpression(IColumn & column, size_t column_idx { const Block & header = getPort().getHeader(); const IDataType & type = *header.getByPosition(column_idx).type; - auto settings = context->getSettingsRef(); + const auto & settings = context->getSettingsRef(); /// Advance the token iterator until the start of the column expression readUntilTheEndOfRowAndReTokenize(column_idx); diff --git a/src/Processors/QueryPlan/Optimizations/optimizeReadInOrder.cpp b/src/Processors/QueryPlan/Optimizations/optimizeReadInOrder.cpp index 28eb4da2e17..415a6a11999 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizeReadInOrder.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizeReadInOrder.cpp @@ -1057,7 +1057,7 @@ size_t tryReuseStorageOrderingForWindowFunctions(QueryPlan::Node * parent_node, } auto context = read_from_merge_tree->getContext(); - const auto & settings = context->getSettings(); + const auto & settings = context->getSettingsRef(); if (!settings.optimize_read_in_window_order || (settings.optimize_read_in_order && settings.query_plan_read_in_order) || context->getSettingsRef().allow_experimental_analyzer) { return 0; diff --git a/src/Server/MySQLHandler.cpp b/src/Server/MySQLHandler.cpp index b6d795b1e69..3deb09bae88 100644 --- a/src/Server/MySQLHandler.cpp +++ b/src/Server/MySQLHandler.cpp @@ -473,7 +473,7 @@ void MySQLHandler::comQuery(ReadBuffer & payload, bool binary_protocol) query_context->setCurrentQueryId(fmt::format("mysql:{}:{}", connection_id, toString(UUIDHelpers::generateV4()))); /// --- Workaround for Bug 56173. Can be removed when the analyzer is on by default. - auto settings = query_context->getSettings(); + auto settings = query_context->getSettingsCopy(); settings.prefer_column_name_to_alias = true; query_context->setSettings(settings); diff --git a/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.cpp b/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.cpp index d471c67553d..c287fc817eb 100644 --- a/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.cpp +++ b/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.cpp @@ -283,7 +283,7 @@ ConnectionPoolWithFailoverPtr DistributedAsyncInsertDirectoryQueue::createPool(c auto pools = createPoolsForAddresses(addresses, pool_factory, storage.log); - const auto settings = storage.getContext()->getSettings(); + const auto & settings = storage.getContext()->getSettingsRef(); return std::make_shared(std::move(pools), settings.load_balancing, settings.distributed_replica_error_half_life.totalSeconds(), diff --git a/src/Storages/MergeTree/MergedBlockOutputStream.cpp b/src/Storages/MergeTree/MergedBlockOutputStream.cpp index 38869aebaa5..4ee68580d3f 100644 --- a/src/Storages/MergeTree/MergedBlockOutputStream.cpp +++ b/src/Storages/MergeTree/MergedBlockOutputStream.cpp @@ -34,7 +34,7 @@ MergedBlockOutputStream::MergedBlockOutputStream( , write_settings(write_settings_) { MergeTreeWriterSettings writer_settings( - data_part->storage.getContext()->getSettings(), + data_part->storage.getContext()->getSettingsRef(), write_settings, storage_settings, data_part->index_granularity_info.mark_type.adaptive, diff --git a/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp b/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp index c167ac87317..05cd77dcd40 100644 --- a/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp +++ b/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp @@ -23,7 +23,7 @@ MergedColumnOnlyOutputStream::MergedColumnOnlyOutputStream( const MergeTreeIndexGranularityInfo * index_granularity_info) : IMergedBlockOutputStream(data_part->storage.getSettings(), data_part->getDataPartStoragePtr(), metadata_snapshot_, columns_list_, /*reset_columns=*/ true) { - const auto & global_settings = data_part->storage.getContext()->getSettings(); + const auto & global_settings = data_part->storage.getContext()->getSettingsRef(); MergeTreeWriterSettings writer_settings( global_settings, diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp index 9cec8c75ebe..731bd7ec3d3 100644 --- a/src/Storages/StorageURL.cpp +++ b/src/Storages/StorageURL.cpp @@ -462,7 +462,7 @@ std::pair> StorageURLSource: setCredentials(credentials, request_uri); - const auto settings = context_->getSettings(); + const auto & settings = context_->getSettingsRef(); auto proxy_config = getProxyConfiguration(request_uri.getScheme()); @@ -1324,7 +1324,7 @@ std::optional IStorageURLBase::tryGetLastModificationTime( const Poco::Net::HTTPBasicCredentials & credentials, const ContextPtr & context) { - auto settings = context->getSettingsRef(); + const auto & settings = context->getSettingsRef(); auto uri = Poco::URI(url); diff --git a/src/Storages/StorageView.cpp b/src/Storages/StorageView.cpp index 5f768bce978..929896e3246 100644 --- a/src/Storages/StorageView.cpp +++ b/src/Storages/StorageView.cpp @@ -97,7 +97,7 @@ bool hasJoin(const ASTSelectWithUnionQuery & ast) ContextPtr getViewContext(ContextPtr context, const StorageSnapshotPtr & storage_snapshot) { auto view_context = storage_snapshot->metadata->getSQLSecurityOverriddenContext(context); - Settings view_settings = view_context->getSettings(); + Settings view_settings = view_context->getSettingsCopy(); view_settings.max_result_rows = 0; view_settings.max_result_bytes = 0; view_settings.extremes = false; diff --git a/src/Storages/getStructureOfRemoteTable.cpp b/src/Storages/getStructureOfRemoteTable.cpp index 56071abaa95..9d23f132759 100644 --- a/src/Storages/getStructureOfRemoteTable.cpp +++ b/src/Storages/getStructureOfRemoteTable.cpp @@ -65,7 +65,7 @@ ColumnsDescription getStructureOfRemoteTableInShard( /// Ignore limit for result number of rows (that could be set during handling CSE/CTE), /// since this is a service query and should not lead to query failure. { - Settings new_settings = new_context->getSettings(); + Settings new_settings = new_context->getSettingsCopy(); new_settings.max_result_rows = 0; new_settings.max_result_bytes = 0; new_context->setSettings(new_settings); diff --git a/src/TableFunctions/Hive/TableFunctionHive.cpp b/src/TableFunctions/Hive/TableFunctionHive.cpp index 80494dbe5a8..759807d7a4f 100644 --- a/src/TableFunctions/Hive/TableFunctionHive.cpp +++ b/src/TableFunctions/Hive/TableFunctionHive.cpp @@ -93,7 +93,7 @@ StoragePtr TableFunctionHive::executeImpl( ColumnsDescription /*cached_columns_*/, bool /*is_insert_query*/) const { - const Settings & settings = context_->getSettings(); + const Settings & settings = context_->getSettingsRef(); ParserExpression partition_by_parser; ASTPtr partition_by_ast = parseQuery( partition_by_parser, From 22b37d526bacfa281372211a81a3daf1518ca5d6 Mon Sep 17 00:00:00 2001 From: Blargian Date: Mon, 15 Jul 2024 17:00:47 +0200 Subject: [PATCH 049/661] update joingGet and add joinGetOrNull --- .../functions/other-functions.md | 138 +++++++++++++++--- 1 file changed, 121 insertions(+), 17 deletions(-) diff --git a/docs/en/sql-reference/functions/other-functions.md b/docs/en/sql-reference/functions/other-functions.md index 260457b3be1..12d082fe0f3 100644 --- a/docs/en/sql-reference/functions/other-functions.md +++ b/docs/en/sql-reference/functions/other-functions.md @@ -2449,11 +2449,11 @@ As you can see, `runningAccumulate` merges states for each group of rows separat ## joinGet -The function lets you extract data from the table the same way as from a [dictionary](../../sql-reference/dictionaries/index.md). - -Gets the data from [Join](../../engines/table-engines/special/join.md#creating-a-table) tables using the specified join key. +The function lets you extract data from the table the same way as from a [dictionary](../../sql-reference/dictionaries/index.md). Gets the data from [Join](../../engines/table-engines/special/join.md#creating-a-table) tables using the specified join key. +:::note Only supports tables created with the `ENGINE = Join(ANY, LEFT, )` statement. +::: **Syntax** @@ -2463,26 +2463,32 @@ joinGet(join_storage_table_name, `value_column`, join_keys) **Arguments** -- `join_storage_table_name` — an [identifier](../../sql-reference/syntax.md#syntax-identifiers) indicating where the search is performed. The identifier is searched in the default database (see setting `default_database` in the config file). To override the default database, use `USE db_name` or specify the database and the table through the separator `db_name.db_table` as in the example. +- `join_storage_table_name` — an [identifier](../../sql-reference/syntax.md#syntax-identifiers) indicating where the search is performed. - `value_column` — name of the column of the table that contains required data. - `join_keys` — list of keys. +:::note +The identifier is searched for in the default database (see setting `default_database` in the config file). To override the default database, use `USE db_name` or specify the database and the table through the separator `db_name.db_table` as in the example. +::: + **Returned value** -Returns a list of values corresponded to list of keys. - -If certain does not exist in source table then `0` or `null` will be returned based on [join_use_nulls](../../operations/settings/settings.md#join_use_nulls) setting. +- Returns a list of values corresponded to the list of keys. +:::note +If a certain key does not exist in source table then `0` or `null` will be returned based on [join_use_nulls](../../operations/settings/settings.md#join_use_nulls) setting during table creation. More info about `join_use_nulls` in [Join operation](../../engines/table-engines/special/join.md). +::: **Example** Input table: ```sql -CREATE DATABASE db_test -CREATE TABLE db_test.id_val(`id` UInt32, `val` UInt32) ENGINE = Join(ANY, LEFT, id) SETTINGS join_use_nulls = 1 -INSERT INTO db_test.id_val VALUES (1,11)(2,12)(4,13) +CREATE DATABASE db_test; +CREATE TABLE db_test.id_val(`id` UInt32, `val` UInt32) ENGINE = Join(ANY, LEFT, id); +INSERT INTO db_test.id_val VALUES (1, 11)(2, 12)(4, 13); +SELECT * FROM db_test.id_val; ``` ```text @@ -2496,18 +2502,116 @@ INSERT INTO db_test.id_val VALUES (1,11)(2,12)(4,13) Query: ```sql -SELECT joinGet(db_test.id_val, 'val', toUInt32(number)) from numbers(4) SETTINGS join_use_nulls = 1 +SELECT number, joinGet(db_test.id_val, 'val', toUInt32(number)) from numbers(4); ``` Result: ```text -┌─joinGet(db_test.id_val, 'val', toUInt32(number))─┐ -│ 0 │ -│ 11 │ -│ 12 │ -│ 0 │ -└──────────────────────────────────────────────────┘ + ┌─number─┬─joinGet('db_test.id_val', 'val', toUInt32(number))─┐ +1. │ 0 │ 0 │ +2. │ 1 │ 11 │ +3. │ 2 │ 12 │ +4. │ 3 │ 0 │ + └────────┴────────────────────────────────────────────────────┘ +``` + +Setting `join_use_nulls` can be used during table creation to change the behaviour of what gets returned if no key exists in the source table. + +```sql +CREATE DATABASE db_test; +CREATE TABLE db_test.id_val_nulls(`id` UInt32, `val` UInt32) ENGINE = Join(ANY, LEFT, id) SETTINGS join_use_nulls=1; +INSERT INTO db_test.id_val_nulls VALUES (1, 11)(2, 12)(4, 13); +SELECT * FROM db_test.id_val_nulls; +``` + +```text +┌─id─┬─val─┐ +│ 4 │ 13 │ +│ 2 │ 12 │ +│ 1 │ 11 │ +└────┴─────┘ +``` + +Query: + +```sql +SELECT number, joinGet(db_test.id_val_nulls, 'val', toUInt32(number)) from numbers(4); +``` + +Result: + +```text + ┌─number─┬─joinGet('db_test.id_val_nulls', 'val', toUInt32(number))─┐ +1. │ 0 │ ᴺᵁᴸᴸ │ +2. │ 1 │ 11 │ +3. │ 2 │ 12 │ +4. │ 3 │ ᴺᵁᴸᴸ │ + └────────┴──────────────────────────────────────────────────────────┘ +``` + +## joinGetOrNull + +Like [joinGet](#joinget) but returns `NULL` when the key is missing instead of returning the default value. + +**Syntax** + +```sql +joinGetOrNull(join_storage_table_name, `value_column`, join_keys) +``` + +**Arguments** + +- `join_storage_table_name` — an [identifier](../../sql-reference/syntax.md#syntax-identifiers) indicating where the search is performed. +- `value_column` — name of the column of the table that contains required data. +- `join_keys` — list of keys. + +:::note +The identifier is searched for in the default database (see setting `default_database` in the config file). To override the default database, use `USE db_name` or specify the database and the table through the separator `db_name.db_table` as in the example. +::: + +**Returned value** + +- Returns a list of values corresponded to the list of keys. + +:::note +If a certain key does not exist in source table then `NULL` is returned for that key. +::: + +**Example** + +Input table: + +```sql +CREATE DATABASE db_test; +CREATE TABLE db_test.id_val(`id` UInt32, `val` UInt32) ENGINE = Join(ANY, LEFT, id); +INSERT INTO db_test.id_val VALUES (1, 11)(2, 12)(4, 13); +SELECT * FROM db_test.id_val; +``` + +```text +┌─id─┬─val─┐ +│ 4 │ 13 │ +│ 2 │ 12 │ +│ 1 │ 11 │ +└────┴─────┘ +``` + +Query: + +```sql +SELECT number, joinGetOrNull(db_test.id_val, 'val', toUInt32(number)) from numbers(4); +``` + +Result: + +```text + ┌─number─┬─joinGetOrNull('db_test.id_val', 'val', toUInt32(number))─┐ +1. │ 0 │ ᴺᵁᴸᴸ │ +2. │ 1 │ 11 │ +3. │ 2 │ 12 │ +4. │ 3 │ ᴺᵁᴸᴸ │ + └────────┴──────────────────────────────────────────────────────────┘ ``` ## catboostEvaluate From 908f5899ddfdc701df5e9e6189760431e88b6695 Mon Sep 17 00:00:00 2001 From: Ilya Yatsishin <2159081+qoega@users.noreply.github.com> Date: Mon, 15 Jul 2024 17:28:33 +0200 Subject: [PATCH 050/661] Add settings to replace external engines to Null --- src/Core/Settings.h | 1 + src/Core/SettingsChangesHistory.cpp | 3 +- src/Interpreters/InterpreterCreateQuery.cpp | 40 +++++++++++++++++++++ 3 files changed, 43 insertions(+), 1 deletion(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 52fa28a4481..7bf97896357 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -891,6 +891,7 @@ class IColumn; M(Bool, optimize_distinct_in_order, true, "Enable DISTINCT optimization if some columns in DISTINCT form a prefix of sorting. For example, prefix of sorting key in merge tree or ORDER BY statement", 0) \ M(Bool, keeper_map_strict_mode, false, "Enforce additional checks during operations on KeeperMap. E.g. throw an exception on an insert for already existing key", 0) \ M(UInt64, extract_key_value_pairs_max_pairs_per_row, 1000, "Max number of pairs that can be produced by the `extractKeyValuePairs` function. Used as a safeguard against consuming too much memory.", 0) ALIAS(extract_kvp_max_pairs_per_row) \ + M(Bool, restore_replace_external_engine_to_null, false, "Replace all the External table engines to Null on restore. Useful for testing purposes", 0) \ \ \ /* ###################################### */ \ diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp index b9b72209103..a23d9d17da2 100644 --- a/src/Core/SettingsChangesHistory.cpp +++ b/src/Core/SettingsChangesHistory.cpp @@ -78,7 +78,8 @@ static std::initializer_listno_empty_args = true; storage.set(storage.engine, engine_ast); } + + void setNullTableEngine(ASTStorage &storage) + { + auto engine_ast = std::make_shared(); + engine_ast->name = "Null"; + engine_ast->no_empty_args = true; + storage.set(storage.engine, engine_ast); + } } void InterpreterCreateQuery::setEngine(ASTCreateQuery & create) const @@ -1000,6 +1008,38 @@ void InterpreterCreateQuery::setEngine(ASTCreateQuery & create) const /// Some part of storage definition (such as PARTITION BY) is specified, but ENGINE is not: just set default one. if (!create.storage->engine) setDefaultTableEngine(*create.storage, getContext()->getSettingsRef().default_table_engine.value); + /// For exrternal tables with restore_replace_external_engine_to_null setting we replace external engines to + /// Null table engine. + else (create.storage->engine == "AzureBlobStorage" || + create.storage->engine == "AzureQueue" || + create.storage->engine == "COSN" || + create.storage->engine == "DeltaLake" || + create.storage->engine == "Dictionary" || + create.storage->engine == "Executable" || + create.storage->engine == "ExecutablePool" || + create.storage->engine == "ExternalDistributed" || + create.storage->engine == "File" || + create.storage->engine == "Hudi" || + create.storage->engine == "Iceberg" || + create.storage->engine == "JDBC" || + create.storage->engine == "Kafka" || + create.storage->engine == "MaterializedPostgreSQL" || + create.storage->engine == "MongoDB" || + create.storage->engine == "MySQL" || + create.storage->engine == "NATS" || + create.storage->engine == "ODBC" || + create.storage->engine == "OSS" || + create.storage->engine == "PostgreSQL" || + create.storage->engine == "RabbitMQ" || + create.storage->engine == "Redis" || + create.storage->engine == "S3" || + create.storage->engine == "S3Queue" || + create.storage->engine == "TinyLog" || + create.storage->engine == "URL") + { + if (getContext()->getSettingsRef().restore_replace_external_engine_to_null) + setNullTableEngine(*create.storage) + } return; } From 7d70968db3527d894bc6c02d51dc70f932f7eacd Mon Sep 17 00:00:00 2001 From: Ilya Yatsishin <2159081+qoega@users.noreply.github.com> Date: Mon, 15 Jul 2024 18:47:04 +0200 Subject: [PATCH 051/661] try fix --- src/Interpreters/InterpreterCreateQuery.cpp | 52 ++++++++++----------- 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index f8696caebe7..9eb13a29af7 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -1010,32 +1010,32 @@ void InterpreterCreateQuery::setEngine(ASTCreateQuery & create) const setDefaultTableEngine(*create.storage, getContext()->getSettingsRef().default_table_engine.value); /// For exrternal tables with restore_replace_external_engine_to_null setting we replace external engines to /// Null table engine. - else (create.storage->engine == "AzureBlobStorage" || - create.storage->engine == "AzureQueue" || - create.storage->engine == "COSN" || - create.storage->engine == "DeltaLake" || - create.storage->engine == "Dictionary" || - create.storage->engine == "Executable" || - create.storage->engine == "ExecutablePool" || - create.storage->engine == "ExternalDistributed" || - create.storage->engine == "File" || - create.storage->engine == "Hudi" || - create.storage->engine == "Iceberg" || - create.storage->engine == "JDBC" || - create.storage->engine == "Kafka" || - create.storage->engine == "MaterializedPostgreSQL" || - create.storage->engine == "MongoDB" || - create.storage->engine == "MySQL" || - create.storage->engine == "NATS" || - create.storage->engine == "ODBC" || - create.storage->engine == "OSS" || - create.storage->engine == "PostgreSQL" || - create.storage->engine == "RabbitMQ" || - create.storage->engine == "Redis" || - create.storage->engine == "S3" || - create.storage->engine == "S3Queue" || - create.storage->engine == "TinyLog" || - create.storage->engine == "URL") + else (create.storage->engine->name == "AzureBlobStorage" || + create.storage->engine->name == "AzureQueue" || + create.storage->engine->name == "COSN" || + create.storage->engine->name == "DeltaLake" || + create.storage->engine->name == "Dictionary" || + create.storage->engine->name == "Executable" || + create.storage->engine->name == "ExecutablePool" || + create.storage->engine->name == "ExternalDistributed" || + create.storage->engine->name == "File" || + create.storage->engine->name == "Hudi" || + create.storage->engine->name == "Iceberg" || + create.storage->engine->name == "JDBC" || + create.storage->engine->name == "Kafka" || + create.storage->engine->name == "MaterializedPostgreSQL" || + create.storage->engine->name == "MongoDB" || + create.storage->engine->name == "MySQL" || + create.storage->engine->name == "NATS" || + create.storage->engine->name == "ODBC" || + create.storage->engine->name == "OSS" || + create.storage->engine->name == "PostgreSQL" || + create.storage->engine->name == "RabbitMQ" || + create.storage->engine->name == "Redis" || + create.storage->engine->name == "S3" || + create.storage->engine->name == "S3Queue" || + create.storage->engine->name == "TinyLog" || + create.storage->engine->name == "URL") { if (getContext()->getSettingsRef().restore_replace_external_engine_to_null) setNullTableEngine(*create.storage) From fc49b1b75f9b075f28cdc4b7eeb768339bb1ebd5 Mon Sep 17 00:00:00 2001 From: Ilya Yatsishin <2159081+qoega@users.noreply.github.com> Date: Mon, 15 Jul 2024 19:02:21 +0200 Subject: [PATCH 052/661] semicolon --- src/Interpreters/InterpreterCreateQuery.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index 9eb13a29af7..94230f0e7d1 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -1038,7 +1038,7 @@ void InterpreterCreateQuery::setEngine(ASTCreateQuery & create) const create.storage->engine->name == "URL") { if (getContext()->getSettingsRef().restore_replace_external_engine_to_null) - setNullTableEngine(*create.storage) + setNullTableEngine(*create.storage); } return; } From 083e4b17db62121d6905c35480c3a462dc26477b Mon Sep 17 00:00:00 2001 From: Xu Jia Date: Tue, 16 Jul 2024 09:34:52 +0800 Subject: [PATCH 053/661] trigger CI From 478616de3d03495cf8c324da9464a9807b51ba41 Mon Sep 17 00:00:00 2001 From: Yatsishin Ilya <2159081+qoega@users.noreply.github.com> Date: Tue, 16 Jul 2024 10:54:39 +0000 Subject: [PATCH 054/661] forgot --- src/Interpreters/InterpreterCreateQuery.cpp | 52 ++++++++++----------- 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index 94230f0e7d1..3b23c6899e9 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -1010,32 +1010,32 @@ void InterpreterCreateQuery::setEngine(ASTCreateQuery & create) const setDefaultTableEngine(*create.storage, getContext()->getSettingsRef().default_table_engine.value); /// For exrternal tables with restore_replace_external_engine_to_null setting we replace external engines to /// Null table engine. - else (create.storage->engine->name == "AzureBlobStorage" || - create.storage->engine->name == "AzureQueue" || - create.storage->engine->name == "COSN" || - create.storage->engine->name == "DeltaLake" || - create.storage->engine->name == "Dictionary" || - create.storage->engine->name == "Executable" || - create.storage->engine->name == "ExecutablePool" || - create.storage->engine->name == "ExternalDistributed" || - create.storage->engine->name == "File" || - create.storage->engine->name == "Hudi" || - create.storage->engine->name == "Iceberg" || - create.storage->engine->name == "JDBC" || - create.storage->engine->name == "Kafka" || - create.storage->engine->name == "MaterializedPostgreSQL" || - create.storage->engine->name == "MongoDB" || - create.storage->engine->name == "MySQL" || - create.storage->engine->name == "NATS" || - create.storage->engine->name == "ODBC" || - create.storage->engine->name == "OSS" || - create.storage->engine->name == "PostgreSQL" || - create.storage->engine->name == "RabbitMQ" || - create.storage->engine->name == "Redis" || - create.storage->engine->name == "S3" || - create.storage->engine->name == "S3Queue" || - create.storage->engine->name == "TinyLog" || - create.storage->engine->name == "URL") + else if (create.storage->engine->name == "AzureBlobStorage" || + create.storage->engine->name == "AzureQueue" || + create.storage->engine->name == "COSN" || + create.storage->engine->name == "DeltaLake" || + create.storage->engine->name == "Dictionary" || + create.storage->engine->name == "Executable" || + create.storage->engine->name == "ExecutablePool" || + create.storage->engine->name == "ExternalDistributed" || + create.storage->engine->name == "File" || + create.storage->engine->name == "Hudi" || + create.storage->engine->name == "Iceberg" || + create.storage->engine->name == "JDBC" || + create.storage->engine->name == "Kafka" || + create.storage->engine->name == "MaterializedPostgreSQL" || + create.storage->engine->name == "MongoDB" || + create.storage->engine->name == "MySQL" || + create.storage->engine->name == "NATS" || + create.storage->engine->name == "ODBC" || + create.storage->engine->name == "OSS" || + create.storage->engine->name == "PostgreSQL" || + create.storage->engine->name == "RabbitMQ" || + create.storage->engine->name == "Redis" || + create.storage->engine->name == "S3" || + create.storage->engine->name == "S3Queue" || + create.storage->engine->name == "TinyLog" || + create.storage->engine->name == "URL") { if (getContext()->getSettingsRef().restore_replace_external_engine_to_null) setNullTableEngine(*create.storage); From 7ea3324776bd4cb8cc886822a9b30d3dfcaff5a2 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 16 Jul 2024 16:31:34 +0000 Subject: [PATCH 055/661] Refactor in VirtualColumnUtils --- src/Interpreters/ActionsDAG.cpp | 12 ++++----- src/Interpreters/ActionsDAG.h | 3 +-- .../useDataParallelAggregation.cpp | 8 +++--- src/Processors/QueryPlan/SortingStep.cpp | 10 +++++++ src/Storages/MergeTree/MergeTreeData.cpp | 4 +-- .../MergeTree/MergeTreeDataSelectExecutor.cpp | 4 +-- .../StorageObjectStorageSource.cpp | 15 ++++++++--- .../StorageObjectStorageSource.h | 2 +- src/Storages/StorageFile.cpp | 8 ++++-- src/Storages/StorageURL.cpp | 6 +++-- .../System/StorageSystemDetachedParts.cpp | 4 +-- .../StorageSystemDroppedTablesParts.cpp | 4 +-- .../System/StorageSystemDroppedTablesParts.h | 6 ++--- .../System/StorageSystemPartsBase.cpp | 12 ++++----- src/Storages/System/StorageSystemPartsBase.h | 6 ++--- src/Storages/System/StorageSystemTables.cpp | 2 +- src/Storages/VirtualColumnUtils.cpp | 27 +++++++++++-------- src/Storages/VirtualColumnUtils.h | 13 ++++----- 18 files changed, 87 insertions(+), 59 deletions(-) diff --git a/src/Interpreters/ActionsDAG.cpp b/src/Interpreters/ActionsDAG.cpp index 4f03a9e1602..e001406408f 100644 --- a/src/Interpreters/ActionsDAG.cpp +++ b/src/Interpreters/ActionsDAG.cpp @@ -624,9 +624,9 @@ void ActionsDAG::removeAliasesForFilter(const std::string & filter_name) } } -ActionsDAGPtr ActionsDAG::cloneSubDAG(const NodeRawConstPtrs & outputs, bool remove_aliases) +ActionsDAG ActionsDAG::cloneSubDAG(const NodeRawConstPtrs & outputs, bool remove_aliases) { - auto actions = std::make_unique(); + ActionsDAG actions; std::unordered_map copy_map; struct Frame @@ -661,21 +661,21 @@ ActionsDAGPtr ActionsDAG::cloneSubDAG(const NodeRawConstPtrs & outputs, bool rem if (remove_aliases && frame.node->type == ActionType::ALIAS) copy_node = copy_map[frame.node->children.front()]; else - copy_node = &actions->nodes.emplace_back(*frame.node); + copy_node = &actions.nodes.emplace_back(*frame.node); if (frame.node->type == ActionType::INPUT) - actions->inputs.push_back(copy_node); + actions.inputs.push_back(copy_node); stack.pop(); } } - for (auto & node : actions->nodes) + for (auto & node : actions.nodes) for (auto & child : node.children) child = copy_map[child]; for (const auto * output : outputs) - actions->outputs.push_back(copy_map[output]); + actions.outputs.push_back(copy_map[output]); return actions; } diff --git a/src/Interpreters/ActionsDAG.h b/src/Interpreters/ActionsDAG.h index 05948ccf928..6f5c3d3b0df 100644 --- a/src/Interpreters/ActionsDAG.h +++ b/src/Interpreters/ActionsDAG.h @@ -261,13 +261,12 @@ public: void compileExpressions(size_t min_count_to_compile_expression, const std::unordered_set & lazy_executed_nodes = {}); #endif - static ActionsDAGPtr clone(const ActionsDAGPtr & from) { return clone(from.get()); } static ActionsDAGPtr clone(const ActionsDAG * from); ActionsDAG clone(std::unordered_map & old_to_new_nodes) const; ActionsDAG clone() const; - static ActionsDAGPtr cloneSubDAG(const NodeRawConstPtrs & outputs, bool remove_aliases); + static ActionsDAG cloneSubDAG(const NodeRawConstPtrs & outputs, bool remove_aliases); /// Execute actions for header. Input block must have empty columns. /// Result should be equal to the execution of ExpressionActions built from this DAG. diff --git a/src/Processors/QueryPlan/Optimizations/useDataParallelAggregation.cpp b/src/Processors/QueryPlan/Optimizations/useDataParallelAggregation.cpp index 7e0260c0040..0eeaec9bde7 100644 --- a/src/Processors/QueryPlan/Optimizations/useDataParallelAggregation.cpp +++ b/src/Processors/QueryPlan/Optimizations/useDataParallelAggregation.cpp @@ -74,11 +74,11 @@ void removeInjectiveFunctionsFromResultsRecursively(const ActionsDAG::Node * nod /// Our objective is to replace injective function nodes in `actions` results with its children /// until only the irreducible subset of nodes remains. Against these set of nodes we will match partition key expression /// to determine if it maps all rows with the same value of group by key to the same partition. -NodeSet removeInjectiveFunctionsFromResultsRecursively(const ActionsDAGPtr & actions) +NodeSet removeInjectiveFunctionsFromResultsRecursively(const ActionsDAG & actions) { NodeSet irreducible; NodeSet visited; - for (const auto & node : actions->getOutputs()) + for (const auto & node : actions.getOutputs()) removeInjectiveFunctionsFromResultsRecursively(node, irreducible, visited); return irreducible; } @@ -158,7 +158,7 @@ bool isPartitionKeySuitsGroupByKey( auto key_nodes = group_by_actions.findInOutpus(aggregating.getParams().keys); auto group_by_key_actions = ActionsDAG::cloneSubDAG(key_nodes, /*remove_aliases=*/ true); - const auto & gb_key_required_columns = group_by_key_actions->getRequiredColumnsNames(); + const auto & gb_key_required_columns = group_by_key_actions.getRequiredColumnsNames(); const auto & partition_actions = reading.getStorageMetadata()->getPartitionKey().expression->getActionsDAG(); @@ -169,7 +169,7 @@ bool isPartitionKeySuitsGroupByKey( const auto irreducibe_nodes = removeInjectiveFunctionsFromResultsRecursively(group_by_key_actions); - const auto matches = matchTrees(group_by_key_actions->getOutputs(), partition_actions); + const auto matches = matchTrees(group_by_key_actions.getOutputs(), partition_actions); return allOutputsDependsOnlyOnAllowedNodes(partition_actions, irreducibe_nodes, matches); } diff --git a/src/Processors/QueryPlan/SortingStep.cpp b/src/Processors/QueryPlan/SortingStep.cpp index 48fad9f5fdb..e8e761e7ab0 100644 --- a/src/Processors/QueryPlan/SortingStep.cpp +++ b/src/Processors/QueryPlan/SortingStep.cpp @@ -447,6 +447,13 @@ void SortingStep::describeActions(FormatSettings & settings) const settings.out << '\n'; } + if (!partition_by_description.empty()) + { + settings.out << prefix << "Partition by description: "; + dumpSortDescription(partition_by_description, settings.out); + settings.out << '\n'; + } + if (limit) settings.out << prefix << "Limit " << limit << '\n'; } @@ -461,6 +468,9 @@ void SortingStep::describeActions(JSONBuilder::JSONMap & map) const else map.add("Sort Description", explainSortDescription(result_description)); + if (!partition_by_description.empty()) + map.add("Partition By Description", explainSortDescription(partition_by_description)); + if (limit) map.add("Limit", limit); } diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 878e0420665..9aa9490198a 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -1154,7 +1154,7 @@ std::optional MergeTreeData::totalRowsByPartitionPredicateImpl( if (!virtual_columns_block.has(input->result_name)) valid = false; - PartitionPruner partition_pruner(metadata_snapshot, filter_dag.get(), local_context, true /* strict */); + PartitionPruner partition_pruner(metadata_snapshot, &*filter_dag, local_context, true /* strict */); if (partition_pruner.isUseless() && !valid) return {}; @@ -1162,7 +1162,7 @@ std::optional MergeTreeData::totalRowsByPartitionPredicateImpl( if (valid) { virtual_columns_block = getBlockWithVirtualsForFilter(metadata_snapshot, parts); - VirtualColumnUtils::filterBlockWithDAG(filter_dag, virtual_columns_block, local_context); + VirtualColumnUtils::filterBlockWithDAG(std::move(*filter_dag), virtual_columns_block, local_context); part_values = VirtualColumnUtils::extractSingleValueFromBlock(virtual_columns_block, "_part"); if (part_values.empty()) return 0; diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index a6a40a808e5..a37dbfa554c 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -465,7 +465,7 @@ void MergeTreeDataSelectExecutor::buildKeyConditionFromPartOffset( return; part_offset_condition.emplace(KeyCondition{ - dag.get(), + &*dag, context, sample.getNames(), std::make_shared(ActionsDAG(sample.getColumnsWithTypeAndName()), ExpressionActionsSettings{}), @@ -488,7 +488,7 @@ std::optional> MergeTreeDataSelectExecutor::filterPar return {}; auto virtual_columns_block = data.getBlockWithVirtualsForFilter(metadata_snapshot, parts); - VirtualColumnUtils::filterBlockWithDAG(dag, virtual_columns_block, context); + VirtualColumnUtils::filterBlockWithDAG(std::move(*dag), virtual_columns_block, context); return VirtualColumnUtils::extractSingleValueFromBlock(virtual_columns_block, "_part"); } diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp index c86b56d3f1b..e760098f10f 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp @@ -139,7 +139,10 @@ std::shared_ptr StorageObjectStorageSourc paths.reserve(keys.size()); for (const auto & key : keys) paths.push_back(fs::path(configuration->getNamespace()) / key); - VirtualColumnUtils::filterByPathOrFile(keys, paths, filter_dag, virtual_columns, local_context); + + VirtualColumnUtils::buildSetsForDAG(*filter_dag, local_context); + auto actions = std::make_shared(std::move(*filter_dag)); + VirtualColumnUtils::filterByPathOrFile(keys, paths, actions, virtual_columns); copy_configuration->setPaths(keys); } @@ -506,7 +509,11 @@ StorageObjectStorageSource::GlobIterator::GlobIterator( } recursive = key_with_globs == "/**"; - filter_dag = VirtualColumnUtils::createPathAndFileFilterDAG(predicate, virtual_columns); + if (auto filter_dag = VirtualColumnUtils::createPathAndFileFilterDAG(predicate, virtual_columns)) + { + VirtualColumnUtils::buildSetsForDAG(*filter_dag, getContext()); + filter_expr = std::make_shared(std::move(*filter_dag)); + } } else { @@ -570,14 +577,14 @@ StorageObjectStorage::ObjectInfoPtr StorageObjectStorageSource::GlobIterator::ne ++it; } - if (filter_dag) + if (filter_expr) { std::vector paths; paths.reserve(new_batch.size()); for (const auto & object_info : new_batch) paths.push_back(getUniqueStoragePathIdentifier(*configuration, *object_info, false)); - VirtualColumnUtils::filterByPathOrFile(new_batch, paths, filter_dag, virtual_columns, getContext()); + VirtualColumnUtils::filterByPathOrFile(new_batch, paths, filter_expr, virtual_columns); LOG_TEST(logger, "Filtered files: {} -> {}", paths.size(), new_batch.size()); } diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.h b/src/Storages/ObjectStorage/StorageObjectStorageSource.h index b8418ddd07c..e466621e1e1 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSource.h +++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.h @@ -208,7 +208,7 @@ private: ObjectInfos object_infos; ObjectInfos * read_keys; - ActionsDAGPtr filter_dag; + ExpressionActionsPtr filter_expr; ObjectStorageIteratorPtr object_storage_iterator; bool recursive{false}; std::vector expanded_keys; diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index c6acb358d89..fe6f494db00 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -1130,12 +1130,16 @@ StorageFileSource::FilesIterator::FilesIterator( bool distributed_processing_) : WithContext(context_), files(files_), archive_info(std::move(archive_info_)), distributed_processing(distributed_processing_) { - ActionsDAGPtr filter_dag; + std::optional filter_dag; if (!distributed_processing && !archive_info && !files.empty()) filter_dag = VirtualColumnUtils::createPathAndFileFilterDAG(predicate, virtual_columns); if (filter_dag) - VirtualColumnUtils::filterByPathOrFile(files, files, filter_dag, virtual_columns, context_); + { + VirtualColumnUtils::buildSetsForDAG(*filter_dag, context_); + auto actions = std::make_shared(std::move(*filter_dag)); + VirtualColumnUtils::filterByPathOrFile(files, files, actions, virtual_columns); + } } String StorageFileSource::FilesIterator::next() diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp index 9cec8c75ebe..c61bb8ac980 100644 --- a/src/Storages/StorageURL.cpp +++ b/src/Storages/StorageURL.cpp @@ -198,7 +198,7 @@ public: { uris = parseRemoteDescription(uri_, 0, uri_.size(), ',', max_addresses); - ActionsDAGPtr filter_dag; + std::optional filter_dag; if (!uris.empty()) filter_dag = VirtualColumnUtils::createPathAndFileFilterDAG(predicate, virtual_columns); @@ -209,7 +209,9 @@ public: for (const auto & uri : uris) paths.push_back(Poco::URI(uri).getPath()); - VirtualColumnUtils::filterByPathOrFile(uris, paths, filter_dag, virtual_columns, context); + VirtualColumnUtils::buildSetsForDAG(*filter_dag, context); + auto actions = std::make_shared(std::move(*filter_dag)); + VirtualColumnUtils::filterByPathOrFile(uris, paths, actions, virtual_columns); } } diff --git a/src/Storages/System/StorageSystemDetachedParts.cpp b/src/Storages/System/StorageSystemDetachedParts.cpp index 7e4c1de1c65..0d0ae666c10 100644 --- a/src/Storages/System/StorageSystemDetachedParts.cpp +++ b/src/Storages/System/StorageSystemDetachedParts.cpp @@ -307,7 +307,7 @@ protected: std::shared_ptr storage; std::vector columns_mask; - ActionsDAGPtr filter; + std::optional filter; const size_t max_block_size; const size_t num_streams; }; @@ -359,7 +359,7 @@ void StorageSystemDetachedParts::read( void ReadFromSystemDetachedParts::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) { - auto state = std::make_shared(StoragesInfoStream(nullptr, filter, context)); + auto state = std::make_shared(StoragesInfoStream({}, std::move(filter), context)); Pipe pipe; diff --git a/src/Storages/System/StorageSystemDroppedTablesParts.cpp b/src/Storages/System/StorageSystemDroppedTablesParts.cpp index c17d6402d88..defc4ec2d2a 100644 --- a/src/Storages/System/StorageSystemDroppedTablesParts.cpp +++ b/src/Storages/System/StorageSystemDroppedTablesParts.cpp @@ -11,7 +11,7 @@ namespace DB { -StoragesDroppedInfoStream::StoragesDroppedInfoStream(const ActionsDAGPtr & filter, ContextPtr context) +StoragesDroppedInfoStream::StoragesDroppedInfoStream(std::optional filter, ContextPtr context) : StoragesInfoStreamBase(context) { /// Will apply WHERE to subset of columns and then add more columns. @@ -75,7 +75,7 @@ StoragesDroppedInfoStream::StoragesDroppedInfoStream(const ActionsDAGPtr & filte { /// Filter block_to_filter with columns 'database', 'table', 'engine', 'active'. if (filter) - VirtualColumnUtils::filterBlockWithDAG(filter, block_to_filter, context); + VirtualColumnUtils::filterBlockWithDAG(std::move(*filter), block_to_filter, context); rows = block_to_filter.rows(); } diff --git a/src/Storages/System/StorageSystemDroppedTablesParts.h b/src/Storages/System/StorageSystemDroppedTablesParts.h index dff9e41cce3..32468fc31b2 100644 --- a/src/Storages/System/StorageSystemDroppedTablesParts.h +++ b/src/Storages/System/StorageSystemDroppedTablesParts.h @@ -9,7 +9,7 @@ namespace DB class StoragesDroppedInfoStream : public StoragesInfoStreamBase { public: - StoragesDroppedInfoStream(const ActionsDAGPtr & filter, ContextPtr context); + StoragesDroppedInfoStream(std::optional filter, ContextPtr context); protected: bool tryLockTable(StoragesInfo &) override { @@ -30,9 +30,9 @@ public: std::string getName() const override { return "SystemDroppedTablesParts"; } protected: - std::unique_ptr getStoragesInfoStream(const ActionsDAGPtr &, const ActionsDAGPtr & filter, ContextPtr context) override + std::unique_ptr getStoragesInfoStream(std::optional, std::optional filter, ContextPtr context) override { - return std::make_unique(filter, context); + return std::make_unique(std::move(filter), context); } }; diff --git a/src/Storages/System/StorageSystemPartsBase.cpp b/src/Storages/System/StorageSystemPartsBase.cpp index f7d1c1b3eb8..a0c9a5c61bd 100644 --- a/src/Storages/System/StorageSystemPartsBase.cpp +++ b/src/Storages/System/StorageSystemPartsBase.cpp @@ -91,7 +91,7 @@ StoragesInfo::getProjectionParts(MergeTreeData::DataPartStateVector & state, boo return data->getProjectionPartsVectorForInternalUsage({State::Active}, &state); } -StoragesInfoStream::StoragesInfoStream(const ActionsDAGPtr & filter_by_database, const ActionsDAGPtr & filter_by_other_columns, ContextPtr context) +StoragesInfoStream::StoragesInfoStream(std::optional filter_by_database, std::optional filter_by_other_columns, ContextPtr context) : StoragesInfoStreamBase(context) { /// Will apply WHERE to subset of columns and then add more columns. @@ -124,7 +124,7 @@ StoragesInfoStream::StoragesInfoStream(const ActionsDAGPtr & filter_by_database, /// Filter block_to_filter with column 'database'. if (filter_by_database) - VirtualColumnUtils::filterBlockWithDAG(filter_by_database, block_to_filter, context); + VirtualColumnUtils::filterBlockWithDAG(std::move(*filter_by_database), block_to_filter, context); rows = block_to_filter.rows(); /// Block contains new columns, update database_column. @@ -204,7 +204,7 @@ StoragesInfoStream::StoragesInfoStream(const ActionsDAGPtr & filter_by_database, { /// Filter block_to_filter with columns 'database', 'table', 'engine', 'active'. if (filter_by_other_columns) - VirtualColumnUtils::filterBlockWithDAG(filter_by_other_columns, block_to_filter, context); + VirtualColumnUtils::filterBlockWithDAG(std::move(*filter_by_other_columns), block_to_filter, context); rows = block_to_filter.rows(); } @@ -236,8 +236,8 @@ protected: std::shared_ptr storage; std::vector columns_mask; const bool has_state_column; - ActionsDAGPtr filter_by_database; - ActionsDAGPtr filter_by_other_columns; + std::optional filter_by_database; + std::optional filter_by_other_columns; }; ReadFromSystemPartsBase::ReadFromSystemPartsBase( @@ -318,7 +318,7 @@ void StorageSystemPartsBase::read( void ReadFromSystemPartsBase::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) { - auto stream = storage->getStoragesInfoStream(filter_by_database, filter_by_other_columns, context); + auto stream = storage->getStoragesInfoStream(std::move(filter_by_database), std::move(filter_by_other_columns), context); auto header = getOutputStream().header; MutableColumns res_columns = header.cloneEmptyColumns(); diff --git a/src/Storages/System/StorageSystemPartsBase.h b/src/Storages/System/StorageSystemPartsBase.h index 8671fd850f8..806af4a7bf8 100644 --- a/src/Storages/System/StorageSystemPartsBase.h +++ b/src/Storages/System/StorageSystemPartsBase.h @@ -116,7 +116,7 @@ protected: class StoragesInfoStream : public StoragesInfoStreamBase { public: - StoragesInfoStream(const ActionsDAGPtr & filter_by_database, const ActionsDAGPtr & filter_by_other_columns, ContextPtr context); + StoragesInfoStream(std::optional filter_by_database, std::optional filter_by_other_columns, ContextPtr context); }; /** Implements system table 'parts' which allows to get information about data parts for tables of MergeTree family. @@ -146,9 +146,9 @@ protected: StorageSystemPartsBase(const StorageID & table_id_, ColumnsDescription && columns); - virtual std::unique_ptr getStoragesInfoStream(const ActionsDAGPtr & filter_by_database, const ActionsDAGPtr & filter_by_other_columns, ContextPtr context) + virtual std::unique_ptr getStoragesInfoStream(std::optional filter_by_database, std::optional filter_by_other_columns, ContextPtr context) { - return std::make_unique(filter_by_database, filter_by_other_columns, context); + return std::make_unique(std::move(filter_by_database), std::move(filter_by_other_columns), context); } virtual void diff --git a/src/Storages/System/StorageSystemTables.cpp b/src/Storages/System/StorageSystemTables.cpp index 783b899c978..85aaf4ad186 100644 --- a/src/Storages/System/StorageSystemTables.cpp +++ b/src/Storages/System/StorageSystemTables.cpp @@ -169,7 +169,7 @@ ColumnPtr getFilteredTables(const ActionsDAG::Node * predicate, const ColumnPtr block.insert(ColumnWithTypeAndName(std::move(engine_column), std::make_shared(), "engine")); if (dag) - VirtualColumnUtils::filterBlockWithDAG(dag, block, context); + VirtualColumnUtils::filterBlockWithDAG(std::move(*dag), block, context); return block.getByPosition(0).column; } diff --git a/src/Storages/VirtualColumnUtils.cpp b/src/Storages/VirtualColumnUtils.cpp index 1630d9fd9c4..32c6a558340 100644 --- a/src/Storages/VirtualColumnUtils.cpp +++ b/src/Storages/VirtualColumnUtils.cpp @@ -77,15 +77,20 @@ void buildSetsForDAG(const ActionsDAG & dag, const ContextPtr & context) } } -void filterBlockWithDAG(const ActionsDAGPtr & dag, Block & block, ContextPtr context) +void filterBlockWithDAG(ActionsDAG dag, Block & block, ContextPtr context) +{ + buildSetsForDAG(dag, context); + auto actions = std::make_shared(std::move(dag)); + filterBlockWithExpression(actions, block); +} + +void filterBlockWithExpression(const ExpressionActionsPtr & actions, Block & block) { - buildSetsForDAG(*dag, context); - auto actions = std::make_shared(std::move(*ActionsDAG::clone(dag))); Block block_with_filter = block; actions->execute(block_with_filter, /*dry_run=*/ false, /*allow_duplicates_in_input=*/ true); /// Filter the block. - String filter_column_name = dag->getOutputs().at(0)->result_name; + String filter_column_name = actions->getActionsDAG().getOutputs().at(0)->result_name; ColumnPtr filter_column = block_with_filter.getByName(filter_column_name).column->convertToFullColumnIfConst(); ConstantFilterDescription constant_filter(*filter_column); @@ -155,7 +160,7 @@ static void addPathAndFileToVirtualColumns(Block & block, const String & path, s block.getByName("_idx").column->assumeMutableRef().insert(idx); } -ActionsDAGPtr createPathAndFileFilterDAG(const ActionsDAG::Node * predicate, const NamesAndTypesList & virtual_columns) +std::optional createPathAndFileFilterDAG(const ActionsDAG::Node * predicate, const NamesAndTypesList & virtual_columns) { if (!predicate || virtual_columns.empty()) return {}; @@ -171,7 +176,7 @@ ActionsDAGPtr createPathAndFileFilterDAG(const ActionsDAG::Node * predicate, con return splitFilterDagForAllowedInputs(predicate, &block); } -ColumnPtr getFilterByPathAndFileIndexes(const std::vector & paths, const ActionsDAGPtr & dag, const NamesAndTypesList & virtual_columns, const ContextPtr & context) +ColumnPtr getFilterByPathAndFileIndexes(const std::vector & paths, const ExpressionActionsPtr & actions, const NamesAndTypesList & virtual_columns) { Block block; for (const auto & column : virtual_columns) @@ -184,7 +189,7 @@ ColumnPtr getFilterByPathAndFileIndexes(const std::vector & paths, const for (size_t i = 0; i != paths.size(); ++i) addPathAndFileToVirtualColumns(block, paths[i], i); - filterBlockWithDAG(dag, block, context); + filterBlockWithExpression(actions, block); return block.getByName("_idx").column; } @@ -355,15 +360,15 @@ static const ActionsDAG::Node * splitFilterNodeForAllowedInputs( return node; } -ActionsDAGPtr splitFilterDagForAllowedInputs(const ActionsDAG::Node * predicate, const Block * allowed_inputs) +std::optional splitFilterDagForAllowedInputs(const ActionsDAG::Node * predicate, const Block * allowed_inputs) { if (!predicate) - return nullptr; + return {}; ActionsDAG::Nodes additional_nodes; const auto * res = splitFilterNodeForAllowedInputs(predicate, allowed_inputs, additional_nodes); if (!res) - return nullptr; + return {}; return ActionsDAG::cloneSubDAG({res}, true); } @@ -372,7 +377,7 @@ void filterBlockWithPredicate(const ActionsDAG::Node * predicate, Block & block, { auto dag = splitFilterDagForAllowedInputs(predicate, &block); if (dag) - filterBlockWithDAG(dag, block, context); + filterBlockWithDAG(std::move(*dag), block, context); } } diff --git a/src/Storages/VirtualColumnUtils.h b/src/Storages/VirtualColumnUtils.h index 208aa7a8100..72c45964ff4 100644 --- a/src/Storages/VirtualColumnUtils.h +++ b/src/Storages/VirtualColumnUtils.h @@ -23,7 +23,8 @@ namespace VirtualColumnUtils void filterBlockWithPredicate(const ActionsDAG::Node * predicate, Block & block, ContextPtr context); /// Just filters block. Block should contain all the required columns. -void filterBlockWithDAG(const ActionsDAGPtr & dag, Block & block, ContextPtr context); +void filterBlockWithDAG(ActionsDAG dag, Block & block, ContextPtr context); +void filterBlockWithExpression(const ExpressionActionsPtr & actions, Block & block); /// Builds sets used by ActionsDAG inplace. void buildSetsForDAG(const ActionsDAG & dag, const ContextPtr & context); @@ -32,7 +33,7 @@ void buildSetsForDAG(const ActionsDAG & dag, const ContextPtr & context); bool isDeterministicInScopeOfQuery(const ActionsDAG::Node * node); /// Extract a part of predicate that can be evaluated using only columns from input_names. -ActionsDAGPtr splitFilterDagForAllowedInputs(const ActionsDAG::Node * predicate, const Block * allowed_inputs); +std::optional splitFilterDagForAllowedInputs(const ActionsDAG::Node * predicate, const Block * allowed_inputs); /// Extract from the input stream a set of `name` column values template @@ -49,14 +50,14 @@ auto extractSingleValueFromBlock(const Block & block, const String & name) NameSet getVirtualNamesForFileLikeStorage(); VirtualColumnsDescription getVirtualsForFileLikeStorage(const ColumnsDescription & storage_columns); -ActionsDAGPtr createPathAndFileFilterDAG(const ActionsDAG::Node * predicate, const NamesAndTypesList & virtual_columns); +std::optional createPathAndFileFilterDAG(const ActionsDAG::Node * predicate, const NamesAndTypesList & virtual_columns); -ColumnPtr getFilterByPathAndFileIndexes(const std::vector & paths, const ActionsDAGPtr & dag, const NamesAndTypesList & virtual_columns, const ContextPtr & context); +ColumnPtr getFilterByPathAndFileIndexes(const std::vector & paths, const ExpressionActionsPtr & actions, const NamesAndTypesList & virtual_columns); template -void filterByPathOrFile(std::vector & sources, const std::vector & paths, const ActionsDAGPtr & dag, const NamesAndTypesList & virtual_columns, const ContextPtr & context) +void filterByPathOrFile(std::vector & sources, const std::vector & paths, const ExpressionActionsPtr & actions, const NamesAndTypesList & virtual_columns) { - auto indexes_column = getFilterByPathAndFileIndexes(paths, dag, virtual_columns, context); + auto indexes_column = getFilterByPathAndFileIndexes(paths, actions, virtual_columns); const auto & indexes = typeid_cast(*indexes_column).getData(); if (indexes.size() == sources.size()) return; From 0954eefb076d36ec5804b46e594005cd7f4030bf Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 16 Jul 2024 17:01:35 +0000 Subject: [PATCH 056/661] Revert SortingStep changes. --- src/Processors/QueryPlan/SortingStep.cpp | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/src/Processors/QueryPlan/SortingStep.cpp b/src/Processors/QueryPlan/SortingStep.cpp index e8e761e7ab0..48fad9f5fdb 100644 --- a/src/Processors/QueryPlan/SortingStep.cpp +++ b/src/Processors/QueryPlan/SortingStep.cpp @@ -447,13 +447,6 @@ void SortingStep::describeActions(FormatSettings & settings) const settings.out << '\n'; } - if (!partition_by_description.empty()) - { - settings.out << prefix << "Partition by description: "; - dumpSortDescription(partition_by_description, settings.out); - settings.out << '\n'; - } - if (limit) settings.out << prefix << "Limit " << limit << '\n'; } @@ -468,9 +461,6 @@ void SortingStep::describeActions(JSONBuilder::JSONMap & map) const else map.add("Sort Description", explainSortDescription(result_description)); - if (!partition_by_description.empty()) - map.add("Partition By Description", explainSortDescription(partition_by_description)); - if (limit) map.add("Limit", limit); } From b6a790124cd670749b4c504f58a4854307bf7d83 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Tue, 16 Jul 2024 20:16:47 +0000 Subject: [PATCH 057/661] Handling parallel replicas protocol with priority for async communication --- src/Processors/IProcessor.h | 2 + src/Processors/Sources/RemoteSource.cpp | 23 +++++++++++ src/Processors/Sources/RemoteSource.h | 3 ++ src/QueryPipeline/RemoteQueryExecutor.cpp | 38 +++++++++++++++++-- src/QueryPipeline/RemoteQueryExecutor.h | 4 +- .../RemoteQueryExecutorReadContext.h | 2 + 6 files changed, 67 insertions(+), 5 deletions(-) diff --git a/src/Processors/IProcessor.h b/src/Processors/IProcessor.h index 02f7b6b3d12..358983a2179 100644 --- a/src/Processors/IProcessor.h +++ b/src/Processors/IProcessor.h @@ -221,6 +221,8 @@ public: throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method 'schedule' is not implemented for {} processor", getName()); } + virtual void asyncJobReady() {} + /** You must call this method if 'prepare' returned ExpandPipeline. * This method cannot access any port, but it can create new ports for current processor. * diff --git a/src/Processors/Sources/RemoteSource.cpp b/src/Processors/Sources/RemoteSource.cpp index 3d7dd3f76b8..f1d47f69782 100644 --- a/src/Processors/Sources/RemoteSource.cpp +++ b/src/Processors/Sources/RemoteSource.cpp @@ -89,6 +89,12 @@ ISource::Status RemoteSource::prepare() void RemoteSource::work() { + if (async_immediate_work.exchange(false)) + { + LOG_DEBUG(getLogger(__PRETTY_FUNCTION__), "async_immediate_work was true"); + return; + } + /// Connection drain is a heavy operation that may take a long time. /// Therefore we move connection drain from prepare() to work(), and drain multiple connections in parallel. /// See issue: https://github.com/ClickHouse/ClickHouse/issues/60844 @@ -101,6 +107,23 @@ void RemoteSource::work() ISource::work(); } +void RemoteSource::asyncJobReady() +{ + chassert(async_read); + + if (!was_query_sent) + return; + + auto res = query_executor->readAsync(/*probe=*/true); + if (res.type == RemoteQueryExecutor::ReadResult::Type::ParallelReplicasToken) + { + LOG_DEBUG(getLogger(__PRETTY_FUNCTION__), "async_immediate_work is {}", async_immediate_work); + work(); + async_immediate_work = true; + LOG_DEBUG(getLogger(__PRETTY_FUNCTION__), "async_immediate_work is true"); + } +} + std::optional RemoteSource::tryGenerate() { /// onCancel() will do the cancel if the query was sent. diff --git a/src/Processors/Sources/RemoteSource.h b/src/Processors/Sources/RemoteSource.h index 052567bc261..fa04985f101 100644 --- a/src/Processors/Sources/RemoteSource.h +++ b/src/Processors/Sources/RemoteSource.h @@ -32,6 +32,8 @@ public: int schedule() override { return fd; } + void asyncJobReady() override; + void setStorageLimits(const std::shared_ptr & storage_limits_) override; protected: @@ -52,6 +54,7 @@ private: int fd = -1; size_t rows = 0; bool manually_add_rows_before_limit_counter = false; + std::atomic_bool async_immediate_work{false}; }; /// Totals source from RemoteQueryExecutor. diff --git a/src/QueryPipeline/RemoteQueryExecutor.cpp b/src/QueryPipeline/RemoteQueryExecutor.cpp index b08f2002f64..3ca05b53417 100644 --- a/src/QueryPipeline/RemoteQueryExecutor.cpp +++ b/src/QueryPipeline/RemoteQueryExecutor.cpp @@ -469,7 +469,7 @@ RemoteQueryExecutor::ReadResult RemoteQueryExecutor::read() return restartQueryWithoutDuplicatedUUIDs(); } -RemoteQueryExecutor::ReadResult RemoteQueryExecutor::readAsync() +RemoteQueryExecutor::ReadResult RemoteQueryExecutor::readAsync(bool check_packet_type_only) { #if defined(OS_LINUX) if (!read_context || (resent_query && recreate_read_context)) @@ -486,7 +486,21 @@ RemoteQueryExecutor::ReadResult RemoteQueryExecutor::readAsync() { std::lock_guard lock(was_cancelled_mutex); if (was_cancelled) + { + LOG_DEBUG(getLogger(__PRETTY_FUNCTION__), "was_cancelled"); return ReadResult(Block()); + } + + if (has_postponed_packet) + { + has_postponed_packet = false; + auto read_result = processPacket(read_context->getPacket()); + if (read_result.getType() == ReadResult::Type::Data || read_result.getType() == ReadResult::Type::ParallelReplicasToken) + return read_result; + + if (got_duplicated_part_uuids) + break; + } read_context->resume(); @@ -506,12 +520,28 @@ RemoteQueryExecutor::ReadResult RemoteQueryExecutor::readAsync() /// Check if packet is not ready yet. if (read_context->isInProgress()) + { + LOG_DEBUG(getLogger(__PRETTY_FUNCTION__), "read_context still in progress"); return ReadResult(read_context->getFileDescriptor()); + } - auto anything = processPacket(read_context->getPacket()); + const auto packet_type = read_context->getPacketType(); + LOG_DEBUG(getLogger(__PRETTY_FUNCTION__), "Packet type: {}", packet_type); - if (anything.getType() == ReadResult::Type::Data || anything.getType() == ReadResult::Type::ParallelReplicasToken) - return anything; + if (check_packet_type_only) + { + has_postponed_packet = true; + if (packet_type == Protocol::Server::MergeTreeReadTaskRequest + || packet_type == Protocol::Server::MergeTreeAllRangesAnnouncement) + { + return ReadResult(ReadResult::Type::ParallelReplicasToken); + } + return ReadResult(ReadResult::Type::Nothing); + } + + auto read_result = processPacket(read_context->getPacket()); + if (read_result.getType() == ReadResult::Type::Data || read_result.getType() == ReadResult::Type::ParallelReplicasToken) + return read_result; if (got_duplicated_part_uuids) break; diff --git a/src/QueryPipeline/RemoteQueryExecutor.h b/src/QueryPipeline/RemoteQueryExecutor.h index 04a59cc3b7e..6849c3e0a07 100644 --- a/src/QueryPipeline/RemoteQueryExecutor.h +++ b/src/QueryPipeline/RemoteQueryExecutor.h @@ -183,7 +183,7 @@ public: ReadResult read(); /// Async variant of read. Returns ready block or file descriptor which may be used for polling. - ReadResult readAsync(); + ReadResult readAsync(bool check_packet_type_only = false); /// Receive all remain packets and finish query. /// It should be cancelled after read returned empty block. @@ -303,6 +303,8 @@ private: */ bool got_duplicated_part_uuids = false; + bool has_postponed_packet = false; + /// Parts uuids, collected from remote replicas std::vector duplicated_part_uuids; diff --git a/src/QueryPipeline/RemoteQueryExecutorReadContext.h b/src/QueryPipeline/RemoteQueryExecutorReadContext.h index b8aa8bb9111..c054e75f6f1 100644 --- a/src/QueryPipeline/RemoteQueryExecutorReadContext.h +++ b/src/QueryPipeline/RemoteQueryExecutorReadContext.h @@ -39,6 +39,8 @@ public: Packet getPacket() { return std::move(packet); } + UInt64 getPacketType() const { return packet.type; } + private: bool checkTimeout(bool blocking = false); From a7310e51939ad6053d6ab94b07f0171457e5d779 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Mon, 8 Jul 2024 19:32:27 +0200 Subject: [PATCH 058/661] Ignore async_load_databases for ATTACH query It is quite odd that when ATTACH finishes the tables may not be exists, due to async_load_databases. For server startup it makes total sense, but not for queries. Plus, you can execute queries in parallel if you want to make it faster. Note, that server startup does not uses this code, see loadMetadata.cpp. Signed-off-by: Azat Khuzhin --- src/Interpreters/InterpreterCreateQuery.cpp | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index 84d7f0a587c..1d8d885b216 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -362,18 +362,10 @@ BlockIO InterpreterCreateQuery::createDatabase(ASTCreateQuery & create) TablesLoader loader{getContext()->getGlobalContext(), {{database_name, database}}, mode}; auto load_tasks = loader.loadTablesAsync(); auto startup_tasks = loader.startupTablesAsync(); - if (getContext()->getGlobalContext()->getServerSettings().async_load_databases) - { - scheduleLoad(load_tasks); - scheduleLoad(startup_tasks); - } - else - { - /// First prioritize, schedule and wait all the load table tasks - waitLoad(currentPoolOr(TablesLoaderForegroundPoolId), load_tasks); - /// Only then prioritize, schedule and wait all the startup tasks - waitLoad(currentPoolOr(TablesLoaderForegroundPoolId), startup_tasks); - } + /// First prioritize, schedule and wait all the load table tasks + waitLoad(currentPoolOr(TablesLoaderForegroundPoolId), load_tasks); + /// Only then prioritize, schedule and wait all the startup tasks + waitLoad(currentPoolOr(TablesLoaderForegroundPoolId), startup_tasks); } } catch (...) From f30d35ae2926948f1e6a268917113e757df4e2df Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Mon, 8 Jul 2024 19:34:18 +0200 Subject: [PATCH 059/661] Revert "Merge pull request #65571 from ClickHouse/fix-flaky-test-4" Reverts: https://github.com/ClickHouse/ClickHouse/pull/65571 This reverts commit da9a34ea46b504881ffe5aa605c933106862ba25, reversing changes made to cbdb9833f207d4b0e35ad09cf4757f5d5b506b77. Signed-off-by: Azat Khuzhin --- .../0_stateless/01254_dict_load_after_detach_attach.reference | 2 +- .../queries/0_stateless/01254_dict_load_after_detach_attach.sql | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/01254_dict_load_after_detach_attach.reference b/tests/queries/0_stateless/01254_dict_load_after_detach_attach.reference index 9c2c59f6379..2f2d638a294 100644 --- a/tests/queries/0_stateless/01254_dict_load_after_detach_attach.reference +++ b/tests/queries/0_stateless/01254_dict_load_after_detach_attach.reference @@ -1,4 +1,4 @@ -NOT_LOADED +0 NOT_LOADED 0 LOADED 10 1 LOADED diff --git a/tests/queries/0_stateless/01254_dict_load_after_detach_attach.sql b/tests/queries/0_stateless/01254_dict_load_after_detach_attach.sql index 11473c6ce32..ef9e940df8b 100644 --- a/tests/queries/0_stateless/01254_dict_load_after_detach_attach.sql +++ b/tests/queries/0_stateless/01254_dict_load_after_detach_attach.sql @@ -12,7 +12,7 @@ LAYOUT(FLAT()); DETACH DATABASE {CLICKHOUSE_DATABASE:Identifier}; ATTACH DATABASE {CLICKHOUSE_DATABASE:Identifier}; -SELECT COALESCE((SELECT status FROM system.dictionaries WHERE database = currentDatabase() AND name = 'dict')::Nullable(String), 'NOT_LOADED'); +SELECT query_count, status FROM system.dictionaries WHERE database = currentDatabase() AND name = 'dict'; SYSTEM RELOAD DICTIONARY dict; SELECT query_count, status FROM system.dictionaries WHERE database = currentDatabase() AND name = 'dict'; SELECT dictGetUInt64('dict', 'val', toUInt64(0)); From 523e0abb4ec329c0535602c43c17991f4ef043a3 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Wed, 17 Jul 2024 13:15:14 +0000 Subject: [PATCH 060/661] Remove debug logs --- src/Processors/Sources/RemoteSource.cpp | 7 +------ src/QueryPipeline/RemoteQueryExecutor.cpp | 10 +--------- 2 files changed, 2 insertions(+), 15 deletions(-) diff --git a/src/Processors/Sources/RemoteSource.cpp b/src/Processors/Sources/RemoteSource.cpp index f1d47f69782..e33613564a2 100644 --- a/src/Processors/Sources/RemoteSource.cpp +++ b/src/Processors/Sources/RemoteSource.cpp @@ -90,10 +90,7 @@ ISource::Status RemoteSource::prepare() void RemoteSource::work() { if (async_immediate_work.exchange(false)) - { - LOG_DEBUG(getLogger(__PRETTY_FUNCTION__), "async_immediate_work was true"); return; - } /// Connection drain is a heavy operation that may take a long time. /// Therefore we move connection drain from prepare() to work(), and drain multiple connections in parallel. @@ -114,13 +111,11 @@ void RemoteSource::asyncJobReady() if (!was_query_sent) return; - auto res = query_executor->readAsync(/*probe=*/true); + auto res = query_executor->readAsync(/*check_packet_type_only=*/true); if (res.type == RemoteQueryExecutor::ReadResult::Type::ParallelReplicasToken) { - LOG_DEBUG(getLogger(__PRETTY_FUNCTION__), "async_immediate_work is {}", async_immediate_work); work(); async_immediate_work = true; - LOG_DEBUG(getLogger(__PRETTY_FUNCTION__), "async_immediate_work is true"); } } diff --git a/src/QueryPipeline/RemoteQueryExecutor.cpp b/src/QueryPipeline/RemoteQueryExecutor.cpp index 3ca05b53417..87f634b8334 100644 --- a/src/QueryPipeline/RemoteQueryExecutor.cpp +++ b/src/QueryPipeline/RemoteQueryExecutor.cpp @@ -486,10 +486,7 @@ RemoteQueryExecutor::ReadResult RemoteQueryExecutor::readAsync(bool check_packet { std::lock_guard lock(was_cancelled_mutex); if (was_cancelled) - { - LOG_DEBUG(getLogger(__PRETTY_FUNCTION__), "was_cancelled"); return ReadResult(Block()); - } if (has_postponed_packet) { @@ -520,17 +517,12 @@ RemoteQueryExecutor::ReadResult RemoteQueryExecutor::readAsync(bool check_packet /// Check if packet is not ready yet. if (read_context->isInProgress()) - { - LOG_DEBUG(getLogger(__PRETTY_FUNCTION__), "read_context still in progress"); return ReadResult(read_context->getFileDescriptor()); - } - - const auto packet_type = read_context->getPacketType(); - LOG_DEBUG(getLogger(__PRETTY_FUNCTION__), "Packet type: {}", packet_type); if (check_packet_type_only) { has_postponed_packet = true; + const auto packet_type = read_context->getPacketType(); if (packet_type == Protocol::Server::MergeTreeReadTaskRequest || packet_type == Protocol::Server::MergeTreeAllRangesAnnouncement) { From 2dbd04c8a77bf262f0965ddef9a2c166c22fcf55 Mon Sep 17 00:00:00 2001 From: Blargian Date: Wed, 17 Jul 2024 17:01:22 +0200 Subject: [PATCH 061/661] add toIntXYZ documentation --- .../functions/type-conversion-functions.md | 1275 ++++++++++++++++- 1 file changed, 1239 insertions(+), 36 deletions(-) diff --git a/docs/en/sql-reference/functions/type-conversion-functions.md b/docs/en/sql-reference/functions/type-conversion-functions.md index 61e84ca72d1..057083d317f 100644 --- a/docs/en/sql-reference/functions/type-conversion-functions.md +++ b/docs/en/sql-reference/functions/type-conversion-functions.md @@ -49,105 +49,1308 @@ SETTINGS cast_keep_nullable = 1 └──────────────────┴─────────────────────┴──────────────────┘ ``` -## toInt(8\|16\|32\|64\|128\|256) +## toInt8 -Converts an input value to a value the [Int](../data-types/int-uint.md) data type. This function family includes: +Converts an input value to a value of type `Int8`. -- `toInt8(expr)` — Converts to a value of data type `Int8`. -- `toInt16(expr)` — Converts to a value of data type `Int16`. -- `toInt32(expr)` — Converts to a value of data type `Int32`. -- `toInt64(expr)` — Converts to a value of data type `Int64`. -- `toInt128(expr)` — Converts to a value of data type `Int128`. -- `toInt256(expr)` — Converts to a value of data type `Int256`. +**Syntax** + +```sql +toInt8(expr) +``` **Arguments** -- `expr` — [Expression](../syntax.md/#syntax-expressions) returning a number or a string with the decimal representation of a number. Binary, octal, and hexadecimal representations of numbers are not supported. Leading zeroes are stripped. +- `expr` — Expression returning a number or a string with the decimal representation of a number. [Expression](../syntax.md/#syntax-expressions). + +:::note +Binary, octal, and hexadecimal representations of numbers are not supported. Leading zeroes are stripped. +::: **Returned value** -Integer value in the `Int8`, `Int16`, `Int32`, `Int64`, `Int128` or `Int256` data type. +- 8-bit integer value. [Int8](../data-types/int-uint.md). -Functions use [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning they truncate fractional digits of numbers. +:::note +Function uses [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning it truncates fractional digits of numbers. +::: -The behavior of functions for the [NaN and Inf](../data-types/float.md/#data_type-float-nan-inf) arguments is undefined. Remember about [numeric conversions issues](#common-issues-with-data-conversion), when using the functions. +:::danger +An exception is thrown for [NaN and Inf](../data-types/float.md/#data_type-float-nan-inf) arguments. Keep in mind [numeric conversions issues](#common-issues-with-data-conversion), when using this function. +::: + +**Example** + +Query: + +```sql +SELECT + toInt8(-8), + toInt8(-8.8), + toInt8('-8'); +``` + +Result: + +```response + ┌─toInt8(-8)─┬─toInt8(-8.8)─┬─toInt8('-8')─┐ +1. │ -8 │ -8 │ -8 │ + └────────────┴──────────────┴──────────────┘ +``` + +**See also** + +- [`toInt8OrZero`](#toint8orzero). +- [`toInt8OrNull`](#toint8ornull). +- [`toInt8OrDefault`](#toint8ordefault). + +## toInt8OrZero + +Like [`toInt8`](#toint8), it takes an argument of type [String](../data-types/string.md) and tries to parse it to type `Int8`. If unsuccessful, returns `0`. + +**Syntax** + +```sql +toInt8OrZero(expr) +``` + +**Arguments** + +- `expr` — Expression returning a number or a string with the decimal representation of a number. [Expression](../syntax.md/#syntax-expressions) / [String](../data-types/string.md). + +:::note +Binary, octal, and hexadecimal representations of numbers are not supported. Leading zeroes are stripped. +::: + +**Returned value** + +- 8-bit integer value if successful, otherwise `0`. [Int8](../data-types/int-uint.md). + +:::note +Function uses [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning it truncates fractional digits of numbers. +::: + +:::danger +An exception is thrown for [NaN and Inf](../data-types/float.md/#data_type-float-nan-inf) arguments. Keep in mind [numeric conversions issues](#common-issues-with-data-conversion), when using this function. +::: **Example** Query: ``` sql -SELECT toInt64(nan), toInt32(32), toInt16('16'), toInt8(8.8); +SELECT + toInt8OrZero('-8'), + toInt8OrZero('abc'); ``` Result: ```response -┌─────────toInt64(nan)─┬─toInt32(32)─┬─toInt16('16')─┬─toInt8(8.8)─┐ -│ -9223372036854775808 │ 32 │ 16 │ 8 │ -└──────────────────────┴─────────────┴───────────────┴─────────────┘ + ┌─toInt8OrZero('-8')─┬─toInt8OrZero('abc')─┐ +1. │ -8 │ 0 │ + └────────────────────┴─────────────────────┘ ``` -## toInt(8\|16\|32\|64\|128\|256)OrZero +**See also** -Takes an argument of type [String](../data-types/string.md) and tries to parse it into an Int (8 \| 16 \| 32 \| 64 \| 128 \| 256). If unsuccessful, returns `0`. +- [`toInt8`](#toint8). +- [`toInt8OrNull`](#toint8ornull). +- [`toInt8OrDefault`](#toint8ordefault). + +## toInt8OrNull + +Like [`toInt8`](#toint8), takes an argument of type [String](../data-types/string.md) and tries to parse it to type `Int8`. If unsuccessful, returns `NULL`. + +**Syntax** + +```sql +toInt8OrNull(expr) +``` + +**Arguments** + +- `expr` — Expression returning a number or a string with the decimal representation of a number. [Expression](../syntax.md/#syntax-expressions) / [String](../data-types/string.md). + +:::note +Binary, octal, and hexadecimal representations of numbers are not supported. Leading zeroes are stripped. +::: + +**Returned value** + +- 8-bit integer value if successful, otherwise `NULL`. [Int8](../data-types/int-uint.md) / [NULL](../data-types/nullable.md). + +:::note +Function uses [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning it truncates fractional digits of numbers. +::: + +:::danger +An exception is thrown for [NaN and Inf](../data-types/float.md/#data_type-float-nan-inf) arguments. Keep in mind [numeric conversions issues](#common-issues-with-data-conversion), when using this function. +::: **Example** Query: ``` sql -SELECT toInt64OrZero('123123'), toInt8OrZero('123qwe123'); +SELECT toInt8OrNull('-8'), toInt8OrNull('abc'); ``` Result: ```response -┌─toInt64OrZero('123123')─┬─toInt8OrZero('123qwe123')─┐ -│ 123123 │ 0 │ -└─────────────────────────┴───────────────────────────┘ + ┌─toInt8OrNull('-8')─┬─toInt8OrNull('abc')─┐ +1. │ -8 │ ᴺᵁᴸᴸ │ + └────────────────────┴─────────────────────┘ ``` -## toInt(8\|16\|32\|64\|128\|256)OrNull +**See also** -It takes an argument of type String and tries to parse it into Int (8 \| 16 \| 32 \| 64 \| 128 \| 256). If unsuccessful, returns `NULL`. +- [`toInt8`](#toint8). +- [`toInt8OrZero`](#toint8orzero). +- [`toInt8OrDefault`](#toint8ordefault). + +## toInt8OrDefault + +Like [`toInt8`](#toint8), takes an argument of type [String](../data-types/string.md) and tries to parse it to type `Int8`. If unsuccessful, returns the default type value. + +**Syntax** + +```sql +toInt8OrDefault(expr, def) +``` + +**Arguments** + +- `expr` — Expression returning a number or a string with the decimal representation of a number. [Expression](../syntax.md/#syntax-expressions) / [String](../data-types/string.md). +- `def` — The default value to return if parsing to type `Int8` is unsuccessful. [Int8](../data-types/int-uint.md). + +:::note +Binary, octal, and hexadecimal representations of numbers are not supported. Leading zeroes are stripped. +::: + +**Returned value** + +- 8-bit integer value if successful, otherwise returns the default value. [Int8](../data-types/int-uint.md). + +:::note +- Function uses [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning it truncates fractional digits of numbers. +- The default value type should be the same as the cast type. +::: + +:::danger +An exception is thrown for [NaN and Inf](../data-types/float.md/#data_type-float-nan-inf) arguments. Keep in mind [numeric conversions issues](#common-issues-with-data-conversion), when using this function. +::: **Example** Query: ``` sql -SELECT toInt64OrNull('123123'), toInt8OrNull('123qwe123'); +SELECT + toInt8OrDefault('-8', CAST('-1', 'Int8')), + toInt8OrDefault('abc', CAST('-1', 'Int8')); ``` Result: ```response -┌─toInt64OrNull('123123')─┬─toInt8OrNull('123qwe123')─┐ -│ 123123 │ ᴺᵁᴸᴸ │ -└─────────────────────────┴───────────────────────────┘ + ┌─toInt8OrDefault('-8', CAST('-1', 'Int8'))─┬─toInt8OrDefault('abc', CAST('-1', 'Int8'))─┐ +1. │ -8 │ -1 │ + └───────────────────────────────────────────┴────────────────────────────────────────────┘ ``` -## toInt(8\|16\|32\|64\|128\|256)OrDefault +**See also** -It takes an argument of type String and tries to parse it into Int (8 \| 16 \| 32 \| 64 \| 128 \| 256). If unsuccessful, returns the default type value. +- [`toInt8`](#toint8). +- [`toInt8OrZero`](#toint8orzero). +- [`toInt8OrNull`](#toint8orNull). + +## toInt16 + +Converts an input value to a value of type `Int16`. + +**Syntax** + +```sql +toInt16(expr) +``` + +**Arguments** + +- `expr` — Expression returning a number or a string with the decimal representation of a number. [Expression](../syntax.md/#syntax-expressions). + +:::note +Binary, octal, and hexadecimal representations of numbers are not supported. Leading zeroes are stripped. +::: + +**Returned value** + +- 16-bit integer value. [Int16](../data-types/int-uint.md). + +:::note +Function uses [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning it truncates fractional digits of numbers. +::: + +:::danger +An exception is thrown for [NaN and Inf](../data-types/float.md/#data_type-float-nan-inf) arguments. Keep in mind [numeric conversions issues](#common-issues-with-data-conversion), when using this function. +::: + +**Example** + +Query: + +```sql +SELECT + toInt16(-16), + toInt16(-16.16), + toInt16('-16'); +``` + +Result: + +```response + ┌─toInt16(-16)─┬─toInt16(-16.16)─┬─toInt16('-16')─┐ +1. │ -16 │ -16 │ -16 │ + └──────────────┴─────────────────┴────────────────┘ +``` + +**See also** + +- [`toInt16OrZero`](#toint16orzero). +- [`toInt16OrNull`](#toint16ornull). +- [`toInt16OrDefault`](#toint16ordefault). + +## toInt16OrZero + +Like [`toInt16`](#toint16), takes an argument of type [String](../data-types/string.md) and tries to parse it to type `Int16`. If unsuccessful, returns `0`. + +**Syntax** + +```sql +toInt16OrZero(expr) +``` + +**Arguments** + +- `expr` — Expression returning a number or a string with the decimal representation of a number. [Expression](../syntax.md/#syntax-expressions) / [String](../data-types/string.md). + +:::note +Binary, octal, and hexadecimal representations of numbers are not supported. Leading zeroes are stripped. +::: + +**Returned value** + +- 16-bit integer value if successful, otherwise `0`. [Int16](../data-types/int-uint.md). + +:::note +Function uses [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning it truncates fractional digits of numbers. +::: + +:::danger +An exception is thrown for [NaN and Inf](../data-types/float.md/#data_type-float-nan-inf) arguments. Keep in mind [numeric conversions issues](#common-issues-with-data-conversion), when using this function. +::: **Example** Query: ``` sql -SELECT toInt64OrDefault('123123', cast('-1' as Int64)), toInt8OrDefault('123qwe123', cast('-1' as Int8)); +SELECT + toInt16OrZero('-16'), + toInt16OrZero('abc'); ``` Result: ```response -┌─toInt64OrDefault('123123', CAST('-1', 'Int64'))─┬─toInt8OrDefault('123qwe123', CAST('-1', 'Int8'))─┐ -│ 123123 │ -1 │ -└─────────────────────────────────────────────────┴──────────────────────────────────────────────────┘ + ┌─toInt16OrZero('-16')─┬─toInt16OrZero('abc')─┐ +1. │ -16 │ 0 │ + └──────────────────────┴──────────────────────┘ ``` +**See also** + +- [`toInt16`](#toint16). +- [`toInt16OrNull`](#toint16ornull). +- [`toInt16OrDefault`](#toint16ordefault). + +## toInt16OrNull + +Like [`toInt16`](#toint16), takes an argument of type [String](../data-types/string.md) and tries to parse it to type `Int16`. If unsuccessful, returns `NULL`. + +**Syntax** + +```sql +toInt16OrNull(expr) +``` + +**Arguments** + +- `expr` — Expression returning a number or a string with the decimal representation of a number. [Expression](../syntax.md/#syntax-expressions) / [String](../data-types/string.md). + +:::note +Binary, octal, and hexadecimal representations of numbers are not supported. Leading zeroes are stripped. +::: + +**Returned value** + +- 16-bit integer value if successful, otherwise `NULL`. [Int16](../data-types/int-uint.md) / [NULL](../data-types/nullable.md). + +:::note +Function uses [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning it truncates fractional digits of numbers. +::: + +:::danger +An exception is thrown for [NaN and Inf](../data-types/float.md/#data_type-float-nan-inf) arguments. Keep in mind [numeric conversions issues](#common-issues-with-data-conversion), when using this function. +::: + +**Example** + +Query: + +``` sql +SELECT + toInt16OrNull('-16'), + toInt16OrNull('abc'); +``` + +Result: + +```response + ┌─toInt16OrNull('-16')─┬─toInt16OrNull('abc')─┐ +1. │ -16 │ ᴺᵁᴸᴸ │ + └──────────────────────┴──────────────────────┘ +``` + +**See also** + +- [`toInt16`](#toint16). +- [`toInt16OrZero`](#toint16orzero). +- [`toInt16OrDefault`](#toint16ordefault). + +## toInt16OrDefault + +Like [`toInt16`](#toint16), takes an argument of type [String](../data-types/string.md) and tries to parse it to type `Int16`. If unsuccessful, returns the default type value. + +**Syntax** + +```sql +toInt16OrDefault(expr, def) +``` + +**Arguments** + +- `expr` — Expression returning a number or a string with the decimal representation of a number. [Expression](../syntax.md/#syntax-expressions) / [String](../data-types/string.md). +- `def` — The default value to return if parsing to type `Int16` is unsuccessful. [Int8](../data-types/int-uint.md). + +:::note +Binary, octal, and hexadecimal representations of numbers are not supported. Leading zeroes are stripped. +::: + +**Returned value** + +- 16-bit integer value if successful, otherwise returns the default value. [Int16](../data-types/int-uint.md). + +:::note +- Function uses [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning it truncates fractional digits of numbers. +- The default value type should be the same as the cast type. +::: + +:::danger +An exception is thrown for [NaN and Inf](../data-types/float.md/#data_type-float-nan-inf) arguments. Keep in mind [numeric conversions issues](#common-issues-with-data-conversion), when using this function. +::: + +**Example** + +Query: + +``` sql +SELECT toInt16OrDefault('-16', cast('-1' as Int16)), toInt16OrDefault('abc', cast('-1' as Int16)); +``` + +Result: + +```response + ┌─toInt16OrDefault('-16', CAST('-1', 'Int16'))─┬─toInt16OrDefault('abc', CAST('-1', 'Int16'))─┐ +1. │ -16 │ -1 │ + └──────────────────────────────────────────────┴──────────────────────────────────────────────┘ +``` + +**See also** + +- [`toInt16`](#toint16). +- [`toInt16OrZero`](#toint16orzero). +- [`toInt16OrNull`](#toint16ornull). + +## toInt32 + +Converts an input value to a value of type `Int32`. + +**Syntax** + +```sql +toInt32(expr) +``` + +**Arguments** + +- `expr` — Expression returning a number or a string with the decimal representation of a number. [Expression](../syntax.md/#syntax-expressions). + +:::note +Binary, octal, and hexadecimal representations of numbers are not supported. Leading zeroes are stripped. +::: + +**Returned value** + +- 32-bit integer value. [Int32](../data-types/int-uint.md). + +:::note +Function uses [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning it truncates fractional digits of numbers. +::: + +:::danger +An exception is thrown for [NaN and Inf](../data-types/float.md/#data_type-float-nan-inf) arguments. Keep in mind [numeric conversions issues](#common-issues-with-data-conversion), when using this function. +::: + +**Example** + +Query: + +```sql +SELECT + toInt32(-32), + toInt32(-32.32), + toInt32('-32') +``` + +Result: + +```response + ┌─toInt32(-32)─┬─toInt32(-32.32)─┬─toInt32('-32')─┐ +1. │ -32 │ -32 │ -32 │ + └──────────────┴─────────────────┴────────────────┘ +``` + +**See also** + +- [`toInt32OrZero`](#toint32orzero). +- [`toInt32OrNull`](#toint32ornull). +- [`toInt32OrDefault`](#toint32ordefault). + +## toInt32OrZero + +Like [`toInt32`](#toint32), takes an argument of type [String](../data-types/string.md) and tries to parse it to type `Int32`. If unsuccessful, returns `0`. + +**Syntax** + +```sql +toInt32OrZero(expr) +``` + +**Arguments** + +- `expr` — Expression returning a number or a string with the decimal representation of a number. [Expression](../syntax.md/#syntax-expressions) / [String](../data-types/string.md). + +:::note +Binary, octal, and hexadecimal representations of numbers are not supported. Leading zeroes are stripped. +::: + +**Returned value** + +- 32-bit integer value if successful, otherwise `0`. [Int32](../data-types/int-uint.md) + +:::note +Function uses [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning it truncate fractional digits of numbers. +::: + +:::danger +An exception is thrown for [NaN and Inf](../data-types/float.md/#data_type-float-nan-inf) arguments. Keep in mind [numeric conversions issues](#common-issues-with-data-conversion), when using this function. +::: + +**Example** + +Query: + +``` sql +SELECT toInt32OrZero('-32'), toInt32OrZero('abc'); +``` + +Result: + +```response + ┌─toInt32OrZero('-32')─┬─toInt32OrZero('abc')─┐ +1. │ -32 │ 0 │ + └──────────────────────┴──────────────────────┘ +``` +**See also** + +- [`toInt32`](#toint32). +- [`toInt32OrNull`](#toint32ornull). +- [`toInt32OrDefault`](#toint32ordefault). +- +## toInt32OrNull + +Like [`toInt32`](#toint32), takes an argument of type [String](../data-types/string.md) and tries to parse it to type `Int32`. If unsuccessful, returns `NULL`. + +**Syntax** + +```sql +toInt32OrNull(expr) +``` + +**Arguments** + +- `expr` — Expression returning a number or a string with the decimal representation of a number. [Expression](../syntax.md/#syntax-expressions) / [String](../data-types/string.md). + +:::note +Binary, octal, and hexadecimal representations of numbers are not supported. Leading zeroes are stripped. +::: + +**Returned value** + +- 32-bit integer value if successful, otherwise `NULL`. [Int32](../data-types/int-uint.md) / [NULL](../data-types/nullable.md). + +:::note +Function uses [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning it truncates fractional digits of numbers. +::: + +:::danger +An exception is thrown for [NaN and Inf](../data-types/float.md/#data_type-float-nan-inf) arguments. Keep in mind [numeric conversions issues](#common-issues-with-data-conversion), when using this function. +::: + +**Example** + +Query: + +``` sql +SELECT toInt32OrNull('-32'), toInt32OrNull('abc'); +``` + +Result: + +```response + ┌─toInt32OrNull('-32')─┬─toInt32OrNull('abc')─┐ +1. │ -32 │ ᴺᵁᴸᴸ │ + └──────────────────────┴──────────────────────┘ +``` + +**See also** + +- [`toInt32`](#toint32). +- [`toInt32OrZero`](#toint32orzero). +- [`toInt32OrDefault`](#toint32ordefault). + +## toInt32OrDefault + +Like [`toInt32`](#toint32), takes an argument of type [String](../data-types/string.md) and tries to parse it to type `Int32`. If unsuccessful, returns the default type value. + +**Syntax** + +```sql +toInt32OrDefault(expr, def) +``` + +**Arguments** + +- `expr` — Expression returning a number or a string with the decimal representation of a number. [Expression](../syntax.md/#syntax-expressions) / [String](../data-types/string.md). +- `def` — The default value to return if parsing to type `Int32` is unsuccessful. [Int32](../data-types/int-uint.md). + +:::note +Binary, octal, and hexadecimal representations of numbers are not supported. Leading zeroes are stripped. +::: + +**Returned value** + +- 32-bit integer value if successful, otherwise returns the default value. [Int32](../data-types/int-uint.md). + +:::note +- Function uses [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning it truncates fractional digits of numbers. +- The default value type should be the same as the cast type. + ::: + +:::danger +An exception is thrown for [NaN and Inf](../data-types/float.md/#data_type-float-nan-inf) arguments. Keep in mind [numeric conversions issues](#common-issues-with-data-conversion), when using this function. +::: + +**Example** + +Query: + +``` sql +SELECT toInt32OrDefault('-32', cast('-1' as Int32)), toInt32OrDefault('abc', cast('-1' as Int32)); +``` + +Result: + +```response + ┌─toInt32OrDefault('-32', CAST('-1', 'Int32'))─┬─toInt32OrDefault('abc', CAST('-1', 'Int32'))─┐ +1. │ -32 │ -1 │ + └──────────────────────────────────────────────┴──────────────────────────────────────────────┘ +``` + +**See also** + +- [`toInt32`](#toint32). +- [`toInt32OrZero`](#toint32orzero). +- [`toInt32OrNull`](#toint32ornull). + +## toInt64 + +Converts an input value to a value of type `Int64`. + +**Syntax** + +```sql +toInt64(expr) +``` + +**Arguments** + +- `expr` — Expression returning a number or a string with the decimal representation of a number. [Expression](../syntax.md/#syntax-expressions). + +:::note +Binary, octal, and hexadecimal representations of numbers are not supported. Leading zeroes are stripped. +::: + +**Returned value** + +- 64-bit integer value. [Int64](../data-types/int-uint.md). [Int64](../data-types/int-uint.md). + +:::note +Function uses [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning it truncates fractional digits of numbers. +::: + +:::danger +An exception is thrown for [NaN and Inf](../data-types/float.md/#data_type-float-nan-inf) arguments. Keep in mind [numeric conversions issues](#common-issues-with-data-conversion), when using this function. +::: + +**Example** + +Query: + +```sql +SELECT + toInt64(-64), + toInt64(-64.64), + toInt64('-64'); +``` + +Result: + +```response + ┌─toInt64(-64)─┬─toInt64(-64.64)─┬─toInt64('-64')─┐ +1. │ -64 │ -64 │ -64 │ + └──────────────┴─────────────────┴────────────────┘ +``` + +**See also** + +- [`toInt64OrZero`](#toint64orzero). +- [`toInt64OrNull`](#toint64ornull). +- [`toInt64OrDefault`](#toint64ordefault). + +## toInt64OrZero + +Like [`toInt64`](#toint64), takes an argument of type [String](../data-types/string.md) and tries to parse it to type `Int64`. If unsuccessful, returns `0`. + +**Syntax** + +```sql +toInt64OrZero(expr) +``` + +**Arguments** + +- `expr` — Expression returning a number or a string with the decimal representation of a number. [Expression](../syntax.md/#syntax-expressions) / [String](../data-types/string.md). + +:::note +Binary, octal, and hexadecimal representations of numbers are not supported. Leading zeroes are stripped. +::: + +**Returned value** + +- 64-bit integer value if successful, otherwise `0`. [Int64](../data-types/int-uint.md). + +:::note +Function uses [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning it truncates fractional digits of numbers. +::: + +:::danger +An exception is thrown for [NaN and Inf](../data-types/float.md/#data_type-float-nan-inf) arguments. Keep in mind [numeric conversions issues](#common-issues-with-data-conversion), when using this function. +::: + +**Example** + +Query: + +``` sql +SELECT + toInt64OrZero('-64'), + toInt64OrZero('abc'); +``` + +Result: + +```response + ┌─toInt64OrZero('-64')─┬─toInt64OrZero('abc')─┐ +1. │ -64 │ 0 │ + └──────────────────────┴──────────────────────┘ +``` + +**See also** + +- [`toInt64`](#toint64). +- [`toInt64OrNull`](#toint64ornull). +- [`toInt64OrDefault`](#toint64ordefault). + +## toInt64OrNull + +Like [`toInt64`], takes an argument of type [String](../data-types/string.md) and tries to parse it to type `Int64`. If unsuccessful, returns `NULL`. + +**Syntax** + +```sql +toInt64OrNull(expr) +``` + +**Arguments** + +- `expr` — Expression returning a number or a string with the decimal representation of a number. [Expression](../syntax.md/#syntax-expressions) / [String](../data-types/string.md). + +:::note +Binary, octal, and hexadecimal representations of numbers are not supported. Leading zeroes are stripped. +::: + +**Returned value** + +- Integer value of type `Int64` if successful, otherwise `NULL`. [Int64](../data-types/int-uint.md) / [NULL](../data-types/nullable.md). + +:::note +Function uses [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning it truncates fractional digits of numbers. +::: + +:::danger +An exception is thrown for [NaN and Inf](../data-types/float.md/#data_type-float-nan-inf) arguments. Keep in mind [numeric conversions issues](#common-issues-with-data-conversion), when using this function. +::: + +**Example** + +Query: + +``` sql +SELECT + toInt64OrNull('-64'), + toInt64OrNull('abc'); +``` + +Result: + +```response + ┌─toInt64OrNull('-64')─┬─toInt64OrNull('abc')─┐ +1. │ -64 │ ᴺᵁᴸᴸ │ + └──────────────────────┴──────────────────────┘ +``` + +**See also** + +- [`toInt64`](#toint64). +- [`toInt64OrZero`](#toint64orzero). +- [`toInt64OrDefault`](#toint64ordefault). + +## toInt64OrDefault + +Like [`toInt64`](#toint64), takes an argument of type [String](../data-types/string.md) and tries to parse it to type `Int64`. If unsuccessful, returns the default type value. + +**Syntax** + +```sql +toInt64OrDefault(expr, def) +``` + +**Arguments** + +- `expr` — Expression returning a number or a string with the decimal representation of a number. [Expression](../syntax.md/#syntax-expressions) / [String](../data-types/string.md). +- `def` — The default value to return if parsing to type `Int64` is unsuccessful. [Int64](../data-types/int-uint.md). + +:::note +Binary, octal, and hexadecimal representations of numbers are not supported. Leading zeroes are stripped. +::: + +**Returned value** + +- Integer value of type `Int64` if successful, otherwise returns the default value. [Int64](../data-types/int-uint.md). + +:::note +- Function uses [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning it truncates fractional digits of numbers. +- The default value type should be the same as the cast type. + ::: + +:::danger +An exception is thrown for [NaN and Inf](../data-types/float.md/#data_type-float-nan-inf) arguments. Keep in mind [numeric conversions issues](#common-issues-with-data-conversion), when using this function. +::: + +**Example** + +Query: + +``` sql +SELECT + toInt64OrDefault('-64', CAST('-1', 'Int64')), + toInt64OrDefault('abc', CAST('-1', 'Int64')); +``` + +Result: + +```response + ┌─toInt64OrDefault('-64', CAST('-1', 'Int64'))─┬─toInt64OrDefault('abc', CAST('-1', 'Int64'))─┐ +1. │ -64 │ -1 │ + └──────────────────────────────────────────────┴──────────────────────────────────────────────┘ +``` + +**See also** + +- [`toInt64`](#toint64). +- [`toInt64OrZero`](#toint64orzero). +- [`toInt64OrNull`](#toint64ornull). + +## toInt128 + +Converts an input value to a value of type `Int128`. + +**Syntax** + +```sql +toInt128(expr) +``` + +**Arguments** + +- `expr` — Expression returning a number or a string with the decimal representation of a number. [Expression](../syntax.md/#syntax-expressions). + +:::note +Binary, octal, and hexadecimal representations of numbers are not supported. Leading zeroes are stripped. +::: + +**Returned value** + +- 128-bit integer value. [Int128](../data-types/int-uint.md). + +:::note +Function uses [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning it truncates fractional digits of numbers. +::: + +:::danger +An exception is thrown for [NaN and Inf](../data-types/float.md/#data_type-float-nan-inf) arguments. Keep in mind [numeric conversions issues](#common-issues-with-data-conversion), when using this function. +::: + +**Example** + +Query: + +```sql +SELECT + toInt128(-128), + toInt128(-128.8), + toInt128('-128'), +``` + +Result: + +```response + ┌─toInt128(-128)─┬─toInt128(-128.8)─┬─toInt128('-128')─┐ +1. │ -128 │ -128 │ -128 │ + └────────────────┴──────────────────┴──────────────────┘ +``` + +**See also** + +- [`toInt128OrZero`](#toint128orzero). +- [`toInt128OrNull`](#toint128ornull). +- [`toInt128OrDefault`](#toint128ordefault). + +## toInt128OrZero + +Like [`toInt128`](#toint128), takes an argument of type [String](../data-types/string.md) and tries to parse it to type `Int128`. If unsuccessful, returns `0`. + +**Syntax** + +```sql +toInt128OrZero(expr) +``` + +**Arguments** + +- `expr` — Expression returning a number or a string with the decimal representation of a number. [Expression](../syntax.md/#syntax-expressions) / [String](../data-types/string.md). + +:::note +Binary, octal, and hexadecimal representations of numbers are not supported. Leading zeroes are stripped. +::: + +**Returned value** + +- 128-bit integer value if successful, otherwise `0`. [Int128](../data-types/int-uint.md). + +:::note +Function uses [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning it truncates fractional digits of numbers. +::: + +:::danger +An exception is thrown for [NaN and Inf](../data-types/float.md/#data_type-float-nan-inf) arguments. Keep in mind [numeric conversions issues](#common-issues-with-data-conversion), when using this function. +::: + +**Example** + +Query: + +``` sql +SELECT + toInt128OrZero('-128'), + toInt128OrZero('abc'); +``` + +Result: + +```response + ┌─toInt128OrZero('-128')─┬─toInt128OrZero('abc')─┐ +1. │ -128 │ 0 │ + └────────────────────────┴───────────────────────┘ +``` + +**See also** + +- [`toInt128`](#toint128). +- [`toInt128OrNull`](#toint128ornull). +- [`toInt128OrDefault`](#toint128ordefault). + +## toInt128OrNull + +Like [`toInt128`](#toint128), takes an argument of type [String](../data-types/string.md) and tries to parse it to type `Int128`. If unsuccessful, returns `NULL`. + +**Syntax** + +```sql +toInt128OrNull(expr) +``` + +**Arguments** + +- `expr` — Expression returning a number or a string with the decimal representation of a number. [Expression](../syntax.md/#syntax-expressions) / [String](../data-types/string.md). + +:::note +Binary, octal, and hexadecimal representations of numbers are not supported. Leading zeroes are stripped. +::: + +**Returned value** + +- 128-bit integer value if successful, otherwise `NULL`. [Int128](../data-types/int-uint.md) / [NULL](../data-types/nullable.md). + +:::note +Function uses [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning it truncates fractional digits of numbers. +::: + +:::danger +An exception is thrown for [NaN and Inf](../data-types/float.md/#data_type-float-nan-inf) arguments. Keep in mind [numeric conversions issues](#common-issues-with-data-conversion), when using this function. +::: + +**Example** + +Query: + +``` sql +SELECT + toInt128OrNull('-128'), + toInt128OrNull('abc'); +``` + +Result: + +```response + ┌─toInt128OrNull('-128')─┬─toInt128OrNull('abc')─┐ +1. │ -128 │ ᴺᵁᴸᴸ │ + └────────────────────────┴───────────────────────┘ +``` + +**See also** + +- [`toInt128`](#toint128). +- [`toInt128OrZero`](#toint128orzero). +- [`toInt128OrDefault`](#toint128ordefault). + +## toInt128OrDefault + +Like [`toInt128`](#toint128), takes an argument of type [String](../data-types/string.md) and tries to parse it to type `Int128`. If unsuccessful, returns the default type value. + +**Syntax** + +```sql +toInt128OrDefault(expr, def) +``` + +**Arguments** + +- `expr` — Expression returning a number or a string with the decimal representation of a number. [Expression](../syntax.md/#syntax-expressions) / [String](../data-types/string.md). +- `def` — The default value to return if parsing to type `Int128` is unsuccessful. [Int128](../data-types/int-uint.md). + +:::note +Binary, octal, and hexadecimal representations of numbers are not supported. Leading zeroes are stripped. +::: + +**Returned value** + +- 128-bit integer value if successful, otherwise returns the default value. [Int128](../data-types/int-uint.md). + +:::note +- Function uses [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning it truncates fractional digits of numbers. +- The default value type should be the same as the cast type. +::: + +:::danger +An exception is thrown for [NaN and Inf](../data-types/float.md/#data_type-float-nan-inf) arguments. Keep in mind [numeric conversions issues](#common-issues-with-data-conversion), when using this function. +::: + +**Example** + +Query: + +``` sql +SELECT + toInt128OrDefault('-128', CAST('-1', 'Int128')), + toInt128OrDefault('abc', CAST('-1', 'Int128')); +``` + +Result: + +```response + ┌─toInt128OrDefault('-128', CAST('-1', 'Int128'))─┬─toInt128OrDefault('abc', CAST('-1', 'Int128'))─┐ +1. │ -128 │ -1 │ + └─────────────────────────────────────────────────┴────────────────────────────────────────────────┘ +``` + +**See also** + +- [`toInt128`](#toint128). +- [`toInt128OrZero`](#toint128orzero). +- [`toInt128OrNull`](#toint128ornull). + +## toInt256 + +Converts an input value to a value of type `Int256`. + +**Syntax** + +```sql +toInt256(expr) +``` + +**Arguments** + +- `expr` — Expression returning a number or a string with the decimal representation of a number. [Expression](../syntax.md/#syntax-expressions). + +:::note +Binary, octal, and hexadecimal representations of numbers are not supported. Leading zeroes are stripped. +::: + +**Returned value** + +- 256-bit integer value. [Int256](../data-types/int-uint.md). + +:::note +Function uses [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning it truncates fractional digits of numbers. +::: + +:::danger +An exception is thrown for [NaN and Inf](../data-types/float.md/#data_type-float-nan-inf) arguments. Keep in mind [numeric conversions issues](#common-issues-with-data-conversion), when using this function. +::: + +**Example** + +Query: + +```sql +SELECT + toInt256(-256), + toInt256(-256.256), + toInt256('-256'); +``` + +Result: + +```response + ┌─toInt256(-256)─┬─toInt256(-256.256)─┬─toInt256('-256')─┐ +1. │ -256 │ -256 │ -256 │ + └────────────────┴────────────────────┴──────────────────┘ +``` + +**See also** + +- [`toInt256OrZero`](#toint256orzero). +- [`toInt256OrNull`](#toint256ornull). +- [`toInt256OrDefault`](#toint256ordefault). + +## toInt256OrZero + +Like [`toInt256`](#toint256), takes an argument of type [String](../data-types/string.md) and tries to parse it to type `Int256`. If unsuccessful, returns `0`. + +**Syntax** + +```sql +toInt256OrZero(expr) +``` + +**Arguments** + +- `expr` — Expression returning a number or a string with the decimal representation of a number. [Expression](../syntax.md/#syntax-expressions) / [String](../data-types/string.md). + +:::note +Binary, octal, and hexadecimal representations of numbers are not supported. Leading zeroes are stripped. +::: + +**Returned value** + +- 256-bit integer value if successful, otherwise `0`. [Int256](../data-types/int-uint.md). + +:::note +Functions uses [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning it truncates fractional digits of numbers. +::: + +:::danger +An exception is thrown for [NaN and Inf](../data-types/float.md/#data_type-float-nan-inf) arguments. Keep in mind [numeric conversions issues](#common-issues-with-data-conversion), when using this function. +::: + +**Example** + +Query: + +``` sql +SELECT + toInt256OrZero('-256'), + toInt256OrZero('abc'); +``` + +Result: + +```response + ┌─toInt256OrZero('-256')─┬─toInt256OrZero('abc')─┐ +1. │ -256 │ 0 │ + └────────────────────────┴───────────────────────┘ +``` + +**See also** + +- [`toInt256`](#toint256). +- [`toInt256OrNull`](#toint256ornull). +- [`toInt256OrDefault`](#toint256ordefault). + +## toInt256OrNull + +Like [`toInt256`](#toint256), takes an argument of type [String](../data-types/string.md) and tries to parse it to type `Int256`. If unsuccessful, returns `NULL`. + +**Syntax** + +```sql +toInt256OrNull(expr) +``` + +**Arguments** + +- `expr` — Expression returning a number or a string with the decimal representation of a number. [Expression](../syntax.md/#syntax-expressions) / [String](../data-types/string.md). + +:::note +Binary, octal, and hexadecimal representations of numbers are not supported. Leading zeroes are stripped. +::: + +**Returned value** + +- 256-bit integer value if successful, otherwise `NULL`. [Int256](../data-types/int-uint.md) / [NULL](../data-types/nullable.md). + +:::note +Function uses [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning it truncates fractional digits of numbers. +::: + +:::danger +An exception is thrown for [NaN and Inf](../data-types/float.md/#data_type-float-nan-inf) arguments. Keep in mind [numeric conversions issues](#common-issues-with-data-conversion), when using this function. +::: + +**Example** + +Query: + +``` sql +SELECT + toInt256OrNull('-256'), + toInt256OrNull('abc'); +``` + +Result: + +```response + ┌─toInt256OrNull('-256')─┬─toInt256OrNull('abc')─┐ +1. │ -256 │ ᴺᵁᴸᴸ │ + └────────────────────────┴───────────────────────┘ +``` + +**See also** + +- [`toInt256`](#toint256). +- [`toInt256OrZero`](#toint256orzero). +- [`toInt256OrDefault`](#toint256ordefault). + +## toInt256OrDefault + +Like [`toInt256`](#toint256), takes an argument of type [String](../data-types/string.md) and tries to parse it to type `Int256`. If unsuccessful, returns the default type value. + +**Syntax** + +```sql +toInt256OrDefault(expr, def) +``` + +**Arguments** + +- `expr` — Expression returning a number or a string with the decimal representation of a number. [Expression](../syntax.md/#syntax-expressions) / [String](../data-types/string.md). +- `def` — The default value to return if parsing to type `Int256` is unsuccessful. [Int256](../data-types/int-uint.md). + +:::note +Binary, octal, and hexadecimal representations of numbers are not supported. Leading zeroes are stripped. +::: + +**Returned value** + +- 256-bit integer value if successful, otherwise returns the default value. [Int256](../data-types/int-uint.md). + +:::note +- Function uses [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning it truncates fractional digits of numbers. +- The default value type should be the same as the cast type. +::: + +:::danger +An exception is thrown for [NaN and Inf](../data-types/float.md/#data_type-float-nan-inf) arguments. Keep in mind [numeric conversions issues](#common-issues-with-data-conversion), when using this function. +::: + +**Example** + +Query: + +``` sql +SELECT + toInt256OrDefault('-256', CAST('-1', 'Int256')), + toInt256OrDefault('abc', CAST('-1', 'Int256')); +``` + +Result: + +```response + ┌─toInt256OrDefault('-256', CAST('-1', 'Int256'))─┬─toInt256OrDefault('abc', CAST('-1', 'Int256'))─┐ +1. │ -256 │ -1 │ + └─────────────────────────────────────────────────┴────────────────────────────────────────────────┘ +``` + +**See also** + +- [`toInt256`](#toint256). +- [`toInt256OrZero`](#toint256orzero). +- [`toInt256OrNull`](#toint256ornull). ## toUInt(8\|16\|32\|64\|256) @@ -167,7 +1370,7 @@ Converts an input value to the [UInt](../data-types/int-uint.md) data type. This - Integer value in the `UInt8`, `UInt16`, `UInt32`, `UInt64` or `UInt256` data type. -Functions use [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning they truncate fractional digits of numbers. +Functions use [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning it truncates fractional digits of numbers. The behavior of functions for negative arguments and for the [NaN and Inf](../data-types/float.md/#data_type-float-nan-inf) arguments is undefined. If you pass a string with a negative number, for example `'-32'`, ClickHouse raises an exception. Remember about [numeric conversions issues](#common-issues-with-data-conversion), when using the functions. @@ -2289,7 +3492,7 @@ Result: └─────────────────────┴─────────────────┴─────────────────────────────────────┘ ``` -**See Also** +**See also** - [RFC 1123](https://datatracker.ietf.org/doc/html/rfc1123) - [toDate](#todate) From 80e1377e5d2223176274c319938187f0da799280 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Wed, 17 Jul 2024 17:30:28 +0000 Subject: [PATCH 062/661] Fixing build. --- src/Storages/System/StorageSystemTables.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/System/StorageSystemTables.cpp b/src/Storages/System/StorageSystemTables.cpp index 43b761d84b1..d6b577bf6c8 100644 --- a/src/Storages/System/StorageSystemTables.cpp +++ b/src/Storages/System/StorageSystemTables.cpp @@ -125,7 +125,7 @@ ColumnPtr getFilteredTables( block.insert(ColumnWithTypeAndName(std::move(engine_column), std::make_shared(), "engine")); if (dag) - VirtualColumnUtils::filterBlockWithDAG(dag, block, context); + VirtualColumnUtils::filterBlockWithDAG(std::move(*dag), block, context); return block.getByPosition(0).column; } From 55355f43ad420456467121ce43072a10791c5cc8 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 17 Jul 2024 05:19:58 +0200 Subject: [PATCH 063/661] Fix bad code: it was catching exceptions --- src/IO/WithFileSize.cpp | 48 +++++++++---------- ...ry_and_native_with_binary_encoded_types.sh | 4 +- 2 files changed, 25 insertions(+), 27 deletions(-) diff --git a/src/IO/WithFileSize.cpp b/src/IO/WithFileSize.cpp index 3660d962c08..8cea12fa200 100644 --- a/src/IO/WithFileSize.cpp +++ b/src/IO/WithFileSize.cpp @@ -14,40 +14,38 @@ namespace ErrorCodes } template -static size_t getFileSize(T & in) +static std::optional tryGetFileSize(T & in) { if (auto * with_file_size = dynamic_cast(&in)) - { return with_file_size->getFileSize(); - } + + return std::nullopt; +} + +template +static size_t getFileSize(T & in) +{ + if (auto maybe_size = tryGetFileSize(in)) + return *maybe_size; throw Exception(ErrorCodes::UNKNOWN_FILE_SIZE, "Cannot find out file size"); } -size_t getFileSizeFromReadBuffer(ReadBuffer & in) -{ - if (auto * delegate = dynamic_cast(&in)) - { - return getFileSize(delegate->getWrappedReadBuffer()); - } - else if (auto * compressed = dynamic_cast(&in)) - { - return getFileSize(compressed->getWrappedReadBuffer()); - } - - return getFileSize(in); -} - std::optional tryGetFileSizeFromReadBuffer(ReadBuffer & in) { - try - { - return getFileSizeFromReadBuffer(in); - } - catch (...) - { - return std::nullopt; - } + if (auto * delegate = dynamic_cast(&in)) + return tryGetFileSize(delegate->getWrappedReadBuffer()); + else if (auto * compressed = dynamic_cast(&in)) + return tryGetFileSize(compressed->getWrappedReadBuffer()); + return tryGetFileSize(in); +} + +size_t getFileSizeFromReadBuffer(ReadBuffer & in) +{ + if (auto maybe_size = tryGetFileSizeFromReadBuffer(in)) + return *maybe_size; + + throw Exception(ErrorCodes::UNKNOWN_FILE_SIZE, "Cannot find out file size"); } bool isBufferWithFileSize(const ReadBuffer & in) diff --git a/tests/queries/0_stateless/03173_row_binary_and_native_with_binary_encoded_types.sh b/tests/queries/0_stateless/03173_row_binary_and_native_with_binary_encoded_types.sh index 723b11ad620..0c585d36348 100755 --- a/tests/queries/0_stateless/03173_row_binary_and_native_with_binary_encoded_types.sh +++ b/tests/queries/0_stateless/03173_row_binary_and_native_with_binary_encoded_types.sh @@ -6,8 +6,8 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) function test { - $CLICKHOUSE_LOCAL --allow_experimental_dynamic_type=1 --allow_experimental_variant_type=1 --output_format_binary_encode_types_in_binary_format=1 -q "select $1 as value format RowBinaryWithNamesAndTypes" | $CLICKHOUSE_LOCAL --input-format RowBinaryWithNamesAndTypes --input_format_binary_decode_types_in_binary_format=1 -q "select value, toTypeName(value) from table" - $CLICKHOUSE_LOCAL --allow_experimental_dynamic_type=1 --allow_experimental_variant_type=1 --output_format_native_encode_types_in_binary_format=1 -q "select $1 as value format Native" | $CLICKHOUSE_LOCAL --input-format Native --input_format_native_decode_types_in_binary_format=1 -q "select value, toTypeName(value) from table" + $CLICKHOUSE_LOCAL --allow_experimental_dynamic_type=1 --allow_experimental_variant_type=1 --output_format_binary_encode_types_in_binary_format=1 -q "select $1 as value format RowBinaryWithNamesAndTypes" | $CLICKHOUSE_LOCAL --input-format RowBinaryWithNamesAndTypes --input_format_binary_decode_types_in_binary_format=1 -q "select value, toTypeName(value) from table" + $CLICKHOUSE_LOCAL --allow_experimental_dynamic_type=1 --allow_experimental_variant_type=1 --output_format_native_encode_types_in_binary_format=1 -q "select $1 as value format Native" | $CLICKHOUSE_LOCAL --input-format Native --input_format_native_decode_types_in_binary_format=1 -q "select value, toTypeName(value) from table" } test "materialize(42)::UInt8" From e0aedb992f647a8dcd226bc8775795ecad91a551 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 17 Jul 2024 05:34:04 +0200 Subject: [PATCH 064/661] Add a test --- .../03206_no_exceptions_clickhouse_local.reference | 1 + .../0_stateless/03206_no_exceptions_clickhouse_local.sh | 9 +++++++++ 2 files changed, 10 insertions(+) create mode 100644 tests/queries/0_stateless/03206_no_exceptions_clickhouse_local.reference create mode 100755 tests/queries/0_stateless/03206_no_exceptions_clickhouse_local.sh diff --git a/tests/queries/0_stateless/03206_no_exceptions_clickhouse_local.reference b/tests/queries/0_stateless/03206_no_exceptions_clickhouse_local.reference new file mode 100644 index 00000000000..11277a62b06 --- /dev/null +++ b/tests/queries/0_stateless/03206_no_exceptions_clickhouse_local.reference @@ -0,0 +1 @@ +Hello world diff --git a/tests/queries/0_stateless/03206_no_exceptions_clickhouse_local.sh b/tests/queries/0_stateless/03206_no_exceptions_clickhouse_local.sh new file mode 100755 index 00000000000..86839a228dc --- /dev/null +++ b/tests/queries/0_stateless/03206_no_exceptions_clickhouse_local.sh @@ -0,0 +1,9 @@ +#!/usr/bin/env bash +# Tags: no-fasttest +# Tag no-fasttest: In fasttest, ENABLE_LIBRARIES=0, so the grpc library is not built + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +CLICKHOUSE_TERMINATE_ON_ANY_EXCEPTION=1 ${CLICKHOUSE_LOCAL} --query "SELECT * FROM table" --input-format CSV <<<"Hello, world" From c7be25f0a167c2c5ab6944b47779be2f90af443d Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 18 Jul 2024 04:54:36 +0200 Subject: [PATCH 065/661] Fix everything --- src/Disks/IO/AsynchronousBoundedReadBuffer.h | 2 +- src/Disks/IO/ReadBufferFromAzureBlobStorage.cpp | 2 +- src/Disks/IO/ReadBufferFromAzureBlobStorage.h | 2 +- src/Disks/IO/ReadBufferFromRemoteFSGather.h | 2 +- src/IO/Archives/LibArchiveReader.cpp | 2 +- src/IO/Archives/ZipArchiveReader.cpp | 2 +- src/IO/AsynchronousReadBufferFromFileDescriptor.cpp | 2 +- src/IO/AsynchronousReadBufferFromFileDescriptor.h | 2 +- src/IO/ConcatSeekableReadBuffer.h | 2 +- src/IO/MMapReadBufferFromFileDescriptor.cpp | 2 +- src/IO/MMapReadBufferFromFileDescriptor.h | 2 +- src/IO/ParallelReadBuffer.cpp | 2 +- src/IO/ParallelReadBuffer.h | 2 +- src/IO/ReadBufferFromEmptyFile.h | 2 +- src/IO/ReadBufferFromEncryptedFile.h | 2 +- src/IO/ReadBufferFromFileBase.cpp | 6 ++---- src/IO/ReadBufferFromFileBase.h | 2 +- src/IO/ReadBufferFromFileDecorator.cpp | 4 ++-- src/IO/ReadBufferFromFileDecorator.h | 2 +- src/IO/ReadBufferFromFileDescriptor.cpp | 2 +- src/IO/ReadBufferFromFileDescriptor.h | 2 +- src/IO/ReadBufferFromS3.cpp | 6 +++--- src/IO/ReadBufferFromS3.h | 2 +- src/IO/ReadWriteBufferFromHTTP.cpp | 7 ++----- src/IO/ReadWriteBufferFromHTTP.h | 2 +- src/IO/WithFileSize.cpp | 10 +++++++++- src/IO/WithFileSize.h | 7 ++++--- src/Storages/Cache/ExternalDataSourceCache.h | 2 +- .../HDFS/AsynchronousReadBufferFromHDFS.cpp | 4 ++-- .../HDFS/AsynchronousReadBufferFromHDFS.h | 2 +- src/Storages/ObjectStorage/HDFS/ReadBufferFromHDFS.cpp | 8 ++++---- src/Storages/ObjectStorage/HDFS/ReadBufferFromHDFS.h | 2 +- 32 files changed, 52 insertions(+), 48 deletions(-) diff --git a/src/Disks/IO/AsynchronousBoundedReadBuffer.h b/src/Disks/IO/AsynchronousBoundedReadBuffer.h index 9a802348998..3dc8fcc39cb 100644 --- a/src/Disks/IO/AsynchronousBoundedReadBuffer.h +++ b/src/Disks/IO/AsynchronousBoundedReadBuffer.h @@ -34,7 +34,7 @@ public: String getFileName() const override { return impl->getFileName(); } - size_t getFileSize() override { return impl->getFileSize(); } + std::optional tryGetFileSize() override { return impl->tryGetFileSize(); } String getInfoForLog() override { return impl->getInfoForLog(); } diff --git a/src/Disks/IO/ReadBufferFromAzureBlobStorage.cpp b/src/Disks/IO/ReadBufferFromAzureBlobStorage.cpp index da1ea65f2ea..a36a8b031b4 100644 --- a/src/Disks/IO/ReadBufferFromAzureBlobStorage.cpp +++ b/src/Disks/IO/ReadBufferFromAzureBlobStorage.cpp @@ -253,7 +253,7 @@ void ReadBufferFromAzureBlobStorage::initialize() initialized = true; } -size_t ReadBufferFromAzureBlobStorage::getFileSize() +std::optional ReadBufferFromAzureBlobStorage::tryGetFileSize() { if (!blob_client) blob_client = std::make_unique(blob_container_client->GetBlobClient(path)); diff --git a/src/Disks/IO/ReadBufferFromAzureBlobStorage.h b/src/Disks/IO/ReadBufferFromAzureBlobStorage.h index d328195cc26..f407f27e099 100644 --- a/src/Disks/IO/ReadBufferFromAzureBlobStorage.h +++ b/src/Disks/IO/ReadBufferFromAzureBlobStorage.h @@ -42,7 +42,7 @@ public: bool supportsRightBoundedReads() const override { return true; } - size_t getFileSize() override; + std::optional tryGetFileSize() override; size_t readBigAt(char * to, size_t n, size_t range_begin, const std::function & progress_callback) const override; diff --git a/src/Disks/IO/ReadBufferFromRemoteFSGather.h b/src/Disks/IO/ReadBufferFromRemoteFSGather.h index e36365a8174..9f1cb681f1a 100644 --- a/src/Disks/IO/ReadBufferFromRemoteFSGather.h +++ b/src/Disks/IO/ReadBufferFromRemoteFSGather.h @@ -41,7 +41,7 @@ public: void setReadUntilEnd() override { setReadUntilPosition(getFileSize()); } - size_t getFileSize() override { return getTotalSize(blobs_to_read); } + std::optional tryGetFileSize() override { return getTotalSize(blobs_to_read); } size_t getFileOffsetOfBufferEnd() const override { return file_offset_of_buffer_end; } diff --git a/src/IO/Archives/LibArchiveReader.cpp b/src/IO/Archives/LibArchiveReader.cpp index e3fe63fa40d..31bad4d6638 100644 --- a/src/IO/Archives/LibArchiveReader.cpp +++ b/src/IO/Archives/LibArchiveReader.cpp @@ -321,7 +321,7 @@ public: off_t getPosition() override { throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "getPosition not supported when reading from archive"); } String getFileName() const override { return handle.getFileName(); } - size_t getFileSize() override { return handle.getFileInfo().uncompressed_size; } + std::optional tryGetFileSize() override { return handle.getFileInfo().uncompressed_size; } Handle releaseHandle() && { return std::move(handle); } diff --git a/src/IO/Archives/ZipArchiveReader.cpp b/src/IO/Archives/ZipArchiveReader.cpp index 2a9b7a43519..12b07d550c2 100644 --- a/src/IO/Archives/ZipArchiveReader.cpp +++ b/src/IO/Archives/ZipArchiveReader.cpp @@ -317,7 +317,7 @@ public: String getFileName() const override { return handle.getFileName(); } - size_t getFileSize() override { return handle.getFileInfo().uncompressed_size; } + std::optional tryGetFileSize() override { return handle.getFileInfo().uncompressed_size; } /// Releases owned handle to pass it to an enumerator. HandleHolder releaseHandle() && diff --git a/src/IO/AsynchronousReadBufferFromFileDescriptor.cpp b/src/IO/AsynchronousReadBufferFromFileDescriptor.cpp index f8c00d62732..6c4bd09b76f 100644 --- a/src/IO/AsynchronousReadBufferFromFileDescriptor.cpp +++ b/src/IO/AsynchronousReadBufferFromFileDescriptor.cpp @@ -244,7 +244,7 @@ void AsynchronousReadBufferFromFileDescriptor::rewind() file_offset_of_buffer_end = 0; } -size_t AsynchronousReadBufferFromFileDescriptor::getFileSize() +std::optional AsynchronousReadBufferFromFileDescriptor::tryGetFileSize() { return getSizeFromFileDescriptor(fd, getFileName()); } diff --git a/src/IO/AsynchronousReadBufferFromFileDescriptor.h b/src/IO/AsynchronousReadBufferFromFileDescriptor.h index 82659b1aca7..097979fbe00 100644 --- a/src/IO/AsynchronousReadBufferFromFileDescriptor.h +++ b/src/IO/AsynchronousReadBufferFromFileDescriptor.h @@ -68,7 +68,7 @@ public: /// Seek to the beginning, discarding already read data if any. Useful to reread file that changes on every read. void rewind(); - size_t getFileSize() override; + std::optional tryGetFileSize() override; size_t getFileOffsetOfBufferEnd() const override { return file_offset_of_buffer_end; } diff --git a/src/IO/ConcatSeekableReadBuffer.h b/src/IO/ConcatSeekableReadBuffer.h index c8c16c5d887..609f0dc25b8 100644 --- a/src/IO/ConcatSeekableReadBuffer.h +++ b/src/IO/ConcatSeekableReadBuffer.h @@ -21,7 +21,7 @@ public: off_t seek(off_t off, int whence) override; off_t getPosition() override; - size_t getFileSize() override { return total_size; } + std::optional tryGetFileSize() override { return total_size; } private: bool nextImpl() override; diff --git a/src/IO/MMapReadBufferFromFileDescriptor.cpp b/src/IO/MMapReadBufferFromFileDescriptor.cpp index f27828f71b2..83dd192de54 100644 --- a/src/IO/MMapReadBufferFromFileDescriptor.cpp +++ b/src/IO/MMapReadBufferFromFileDescriptor.cpp @@ -87,7 +87,7 @@ off_t MMapReadBufferFromFileDescriptor::seek(off_t offset, int whence) return new_pos; } -size_t MMapReadBufferFromFileDescriptor::getFileSize() +std::optional MMapReadBufferFromFileDescriptor::tryGetFileSize() { return getSizeFromFileDescriptor(getFD(), getFileName()); } diff --git a/src/IO/MMapReadBufferFromFileDescriptor.h b/src/IO/MMapReadBufferFromFileDescriptor.h index f774538374a..de44ec3f9d8 100644 --- a/src/IO/MMapReadBufferFromFileDescriptor.h +++ b/src/IO/MMapReadBufferFromFileDescriptor.h @@ -38,7 +38,7 @@ public: int getFD() const; - size_t getFileSize() override; + std::optional tryGetFileSize() override; size_t readBigAt(char * to, size_t n, size_t offset, const std::function &) const override; bool supportsReadAt() override { return true; } diff --git a/src/IO/ParallelReadBuffer.cpp b/src/IO/ParallelReadBuffer.cpp index e6771235a8e..89cff670e37 100644 --- a/src/IO/ParallelReadBuffer.cpp +++ b/src/IO/ParallelReadBuffer.cpp @@ -152,7 +152,7 @@ off_t ParallelReadBuffer::seek(off_t offset, int whence) return offset; } -size_t ParallelReadBuffer::getFileSize() +std::optional ParallelReadBuffer::tryGetFileSize() { return file_size; } diff --git a/src/IO/ParallelReadBuffer.h b/src/IO/ParallelReadBuffer.h index cfeec2b3677..8852472a8bc 100644 --- a/src/IO/ParallelReadBuffer.h +++ b/src/IO/ParallelReadBuffer.h @@ -33,7 +33,7 @@ public: ~ParallelReadBuffer() override { finishAndWait(); } off_t seek(off_t off, int whence) override; - size_t getFileSize() override; + std::optional tryGetFileSize() override; off_t getPosition() override; const SeekableReadBuffer & getReadBuffer() const { return input; } diff --git a/src/IO/ReadBufferFromEmptyFile.h b/src/IO/ReadBufferFromEmptyFile.h index f21f2f507dc..b15299dafee 100644 --- a/src/IO/ReadBufferFromEmptyFile.h +++ b/src/IO/ReadBufferFromEmptyFile.h @@ -19,7 +19,7 @@ private: std::string getFileName() const override { return ""; } off_t seek(off_t /*off*/, int /*whence*/) override { return 0; } off_t getPosition() override { return 0; } - size_t getFileSize() override { return 0; } + std::optional tryGetFileSize() override { return 0; } }; } diff --git a/src/IO/ReadBufferFromEncryptedFile.h b/src/IO/ReadBufferFromEncryptedFile.h index 3626daccb3e..213d242bb91 100644 --- a/src/IO/ReadBufferFromEncryptedFile.h +++ b/src/IO/ReadBufferFromEncryptedFile.h @@ -30,7 +30,7 @@ public: void setReadUntilEnd() override { in->setReadUntilEnd(); } - size_t getFileSize() override { return in->getFileSize(); } + std::optional tryGetFileSize() override { return in->tryGetFileSize(); } private: bool nextImpl() override; diff --git a/src/IO/ReadBufferFromFileBase.cpp b/src/IO/ReadBufferFromFileBase.cpp index 4ac3f984f78..d42b12ba49b 100644 --- a/src/IO/ReadBufferFromFileBase.cpp +++ b/src/IO/ReadBufferFromFileBase.cpp @@ -26,11 +26,9 @@ ReadBufferFromFileBase::ReadBufferFromFileBase( ReadBufferFromFileBase::~ReadBufferFromFileBase() = default; -size_t ReadBufferFromFileBase::getFileSize() +std::optional ReadBufferFromFileBase::tryGetFileSize() { - if (file_size) - return *file_size; - throw Exception(ErrorCodes::UNKNOWN_FILE_SIZE, "Cannot find out file size for read buffer"); + return file_size; } void ReadBufferFromFileBase::setProgressCallback(ContextPtr context) diff --git a/src/IO/ReadBufferFromFileBase.h b/src/IO/ReadBufferFromFileBase.h index 9870d8bbe43..c98dcd5a93e 100644 --- a/src/IO/ReadBufferFromFileBase.h +++ b/src/IO/ReadBufferFromFileBase.h @@ -50,7 +50,7 @@ public: clock_type = clock_type_; } - size_t getFileSize() override; + std::optional tryGetFileSize() override; void setProgressCallback(ContextPtr context); diff --git a/src/IO/ReadBufferFromFileDecorator.cpp b/src/IO/ReadBufferFromFileDecorator.cpp index 9ac0fb4e475..8a6468b9bd0 100644 --- a/src/IO/ReadBufferFromFileDecorator.cpp +++ b/src/IO/ReadBufferFromFileDecorator.cpp @@ -52,9 +52,9 @@ bool ReadBufferFromFileDecorator::nextImpl() return result; } -size_t ReadBufferFromFileDecorator::getFileSize() +std::optional ReadBufferFromFileDecorator::tryGetFileSize() { - return getFileSizeFromReadBuffer(*impl); + return tryGetFileSizeFromReadBuffer(*impl); } } diff --git a/src/IO/ReadBufferFromFileDecorator.h b/src/IO/ReadBufferFromFileDecorator.h index 6e62c7f741b..69f029c5cf7 100644 --- a/src/IO/ReadBufferFromFileDecorator.h +++ b/src/IO/ReadBufferFromFileDecorator.h @@ -27,7 +27,7 @@ public: ReadBuffer & getWrappedReadBuffer() { return *impl; } - size_t getFileSize() override; + std::optional tryGetFileSize() override; protected: std::unique_ptr impl; diff --git a/src/IO/ReadBufferFromFileDescriptor.cpp b/src/IO/ReadBufferFromFileDescriptor.cpp index 76a80f145e7..51a1a5d8d93 100644 --- a/src/IO/ReadBufferFromFileDescriptor.cpp +++ b/src/IO/ReadBufferFromFileDescriptor.cpp @@ -253,7 +253,7 @@ void ReadBufferFromFileDescriptor::rewind() file_offset_of_buffer_end = 0; } -size_t ReadBufferFromFileDescriptor::getFileSize() +std::optional ReadBufferFromFileDescriptor::tryGetFileSize() { return getSizeFromFileDescriptor(fd, getFileName()); } diff --git a/src/IO/ReadBufferFromFileDescriptor.h b/src/IO/ReadBufferFromFileDescriptor.h index db256ef91c7..6083e744c95 100644 --- a/src/IO/ReadBufferFromFileDescriptor.h +++ b/src/IO/ReadBufferFromFileDescriptor.h @@ -69,7 +69,7 @@ public: /// Seek to the beginning, discarding already read data if any. Useful to reread file that changes on every read. void rewind(); - size_t getFileSize() override; + std::optional tryGetFileSize() override; bool checkIfActuallySeekable() override; diff --git a/src/IO/ReadBufferFromS3.cpp b/src/IO/ReadBufferFromS3.cpp index 9e001232e65..94f317802e3 100644 --- a/src/IO/ReadBufferFromS3.cpp +++ b/src/IO/ReadBufferFromS3.cpp @@ -313,15 +313,15 @@ off_t ReadBufferFromS3::seek(off_t offset_, int whence) return offset; } -size_t ReadBufferFromS3::getFileSize() +std::optional ReadBufferFromS3::tryGetFileSize() { if (file_size) - return *file_size; + return file_size; auto object_size = S3::getObjectSize(*client_ptr, bucket, key, version_id); file_size = object_size; - return *file_size; + return file_size; } off_t ReadBufferFromS3::getPosition() diff --git a/src/IO/ReadBufferFromS3.h b/src/IO/ReadBufferFromS3.h index c6625c2d632..ff04f78ce7b 100644 --- a/src/IO/ReadBufferFromS3.h +++ b/src/IO/ReadBufferFromS3.h @@ -63,7 +63,7 @@ public: off_t getPosition() override; - size_t getFileSize() override; + std::optional tryGetFileSize() override; void setReadUntilPosition(size_t position) override; void setReadUntilEnd() override; diff --git a/src/IO/ReadWriteBufferFromHTTP.cpp b/src/IO/ReadWriteBufferFromHTTP.cpp index b753e66da48..2a62b11aa44 100644 --- a/src/IO/ReadWriteBufferFromHTTP.cpp +++ b/src/IO/ReadWriteBufferFromHTTP.cpp @@ -121,15 +121,12 @@ void ReadWriteBufferFromHTTP::prepareRequest(Poco::Net::HTTPRequest & request, s credentials.authenticate(request); } -size_t ReadWriteBufferFromHTTP::getFileSize() +std::optional ReadWriteBufferFromHTTP::tryGetFileSize() { if (!file_info) file_info = getFileInfo(); - if (file_info->file_size) - return *file_info->file_size; - - throw Exception(ErrorCodes::UNKNOWN_FILE_SIZE, "Cannot find out file size for: {}", initial_uri.toString()); + return file_info->file_size; } bool ReadWriteBufferFromHTTP::supportsReadAt() diff --git a/src/IO/ReadWriteBufferFromHTTP.h b/src/IO/ReadWriteBufferFromHTTP.h index f496fe3ddcd..1c9bda53008 100644 --- a/src/IO/ReadWriteBufferFromHTTP.h +++ b/src/IO/ReadWriteBufferFromHTTP.h @@ -118,7 +118,7 @@ private: std::unique_ptr initialize(); - size_t getFileSize() override; + std::optional tryGetFileSize() override; bool supportsReadAt() override; diff --git a/src/IO/WithFileSize.cpp b/src/IO/WithFileSize.cpp index 8cea12fa200..cbbcab83de2 100644 --- a/src/IO/WithFileSize.cpp +++ b/src/IO/WithFileSize.cpp @@ -13,11 +13,19 @@ namespace ErrorCodes extern const int UNKNOWN_FILE_SIZE; } +size_t WithFileSize::getFileSize() +{ + if (auto maybe_size = tryGetFileSize()) + return *maybe_size; + + throw Exception(ErrorCodes::UNKNOWN_FILE_SIZE, "Cannot find out file size"); +} + template static std::optional tryGetFileSize(T & in) { if (auto * with_file_size = dynamic_cast(&in)) - return with_file_size->getFileSize(); + return with_file_size->tryGetFileSize(); return std::nullopt; } diff --git a/src/IO/WithFileSize.h b/src/IO/WithFileSize.h index 0ae3af98ea0..e5dc383fab0 100644 --- a/src/IO/WithFileSize.h +++ b/src/IO/WithFileSize.h @@ -10,15 +10,16 @@ class ReadBuffer; class WithFileSize { public: - virtual size_t getFileSize() = 0; + /// Returns nullopt if couldn't find out file size; + virtual std::optional tryGetFileSize() = 0; virtual ~WithFileSize() = default; + + size_t getFileSize(); }; bool isBufferWithFileSize(const ReadBuffer & in); size_t getFileSizeFromReadBuffer(ReadBuffer & in); - -/// Return nullopt if couldn't find out file size; std::optional tryGetFileSizeFromReadBuffer(ReadBuffer & in); size_t getDataOffsetMaybeCompressed(const ReadBuffer & in); diff --git a/src/Storages/Cache/ExternalDataSourceCache.h b/src/Storages/Cache/ExternalDataSourceCache.h index 4c8c7974005..3b4eff28307 100644 --- a/src/Storages/Cache/ExternalDataSourceCache.h +++ b/src/Storages/Cache/ExternalDataSourceCache.h @@ -53,7 +53,7 @@ public: bool nextImpl() override; off_t seek(off_t off, int whence) override; off_t getPosition() override; - size_t getFileSize() override { return remote_file_size; } + std::optional tryGetFileSize() override { return remote_file_size; } private: std::unique_ptr local_file_holder; diff --git a/src/Storages/ObjectStorage/HDFS/AsynchronousReadBufferFromHDFS.cpp b/src/Storages/ObjectStorage/HDFS/AsynchronousReadBufferFromHDFS.cpp index 21df7e35284..3bbc4e8a2ea 100644 --- a/src/Storages/ObjectStorage/HDFS/AsynchronousReadBufferFromHDFS.cpp +++ b/src/Storages/ObjectStorage/HDFS/AsynchronousReadBufferFromHDFS.cpp @@ -91,9 +91,9 @@ void AsynchronousReadBufferFromHDFS::prefetch(Priority priority) } -size_t AsynchronousReadBufferFromHDFS::getFileSize() +std::optional AsynchronousReadBufferFromHDFS::tryGetFileSize() { - return impl->getFileSize(); + return impl->tryGetFileSize(); } String AsynchronousReadBufferFromHDFS::getFileName() const diff --git a/src/Storages/ObjectStorage/HDFS/AsynchronousReadBufferFromHDFS.h b/src/Storages/ObjectStorage/HDFS/AsynchronousReadBufferFromHDFS.h index 5aef92315a4..9846d74453b 100644 --- a/src/Storages/ObjectStorage/HDFS/AsynchronousReadBufferFromHDFS.h +++ b/src/Storages/ObjectStorage/HDFS/AsynchronousReadBufferFromHDFS.h @@ -35,7 +35,7 @@ public: void prefetch(Priority priority) override; - size_t getFileSize() override; + std::optional tryGetFileSize() override; String getFileName() const override; diff --git a/src/Storages/ObjectStorage/HDFS/ReadBufferFromHDFS.cpp b/src/Storages/ObjectStorage/HDFS/ReadBufferFromHDFS.cpp index be339d021dc..bf6f9db722c 100644 --- a/src/Storages/ObjectStorage/HDFS/ReadBufferFromHDFS.cpp +++ b/src/Storages/ObjectStorage/HDFS/ReadBufferFromHDFS.cpp @@ -31,7 +31,7 @@ namespace ErrorCodes } -struct ReadBufferFromHDFS::ReadBufferFromHDFSImpl : public BufferWithOwnMemory +struct ReadBufferFromHDFS::ReadBufferFromHDFSImpl : public BufferWithOwnMemory, public WithFileSize { String hdfs_uri; String hdfs_file_path; @@ -90,7 +90,7 @@ struct ReadBufferFromHDFS::ReadBufferFromHDFSImpl : public BufferWithOwnMemory tryGetFileSize() override { return file_size; } @@ -191,9 +191,9 @@ ReadBufferFromHDFS::ReadBufferFromHDFS( ReadBufferFromHDFS::~ReadBufferFromHDFS() = default; -size_t ReadBufferFromHDFS::getFileSize() +std::optional ReadBufferFromHDFS::tryGetFileSize() { - return impl->getFileSize(); + return impl->tryGetFileSize(); } bool ReadBufferFromHDFS::nextImpl() diff --git a/src/Storages/ObjectStorage/HDFS/ReadBufferFromHDFS.h b/src/Storages/ObjectStorage/HDFS/ReadBufferFromHDFS.h index d9671e7e445..5363f07967b 100644 --- a/src/Storages/ObjectStorage/HDFS/ReadBufferFromHDFS.h +++ b/src/Storages/ObjectStorage/HDFS/ReadBufferFromHDFS.h @@ -40,7 +40,7 @@ public: off_t getPosition() override; - size_t getFileSize() override; + std::optional tryGetFileSize() override; size_t getFileOffsetOfBufferEnd() const override; From b8fbfd227fb60e0f244bda716ef5a9bb89376986 Mon Sep 17 00:00:00 2001 From: Xu Jia Date: Thu, 18 Jul 2024 15:41:08 +0800 Subject: [PATCH 066/661] format --- src/Interpreters/InterpreterInsertQuery.cpp | 1 - .../test.py | 40 ++++++++----------- 2 files changed, 17 insertions(+), 24 deletions(-) diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index 4064cd82b67..181fb064b54 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -49,7 +49,6 @@ namespace ProfileEvents { extern const Event InsertQueriesWithSubqueries; extern const Event QueriesWithSubqueries; - extern const int QUERY_IS_PROHIBITED; } namespace DB diff --git a/tests/integration/test_disable_insertion_and_mutation/test.py b/tests/integration/test_disable_insertion_and_mutation/test.py index f098f130d2b..f25964d27b8 100644 --- a/tests/integration/test_disable_insertion_and_mutation/test.py +++ b/tests/integration/test_disable_insertion_and_mutation/test.py @@ -37,29 +37,25 @@ def started_cluster(): def test_disable_insertion_and_mutation(started_cluster): writing_node.query("""CREATE TABLE my_table on cluster default (key UInt64, value String) ENGINE=ReplicatedMergeTree('/clickhouse/tables/{shard}/default.my_table', '{replica}') ORDER BY key partition by (key % 5) """) - assert ( - "QUERY_IS_PROHIBITED" - in reading_node.query_and_get_error("INSERT INTO my_table VALUES (1, 'hello')") + assert "QUERY_IS_PROHIBITED" in reading_node.query_and_get_error( + "INSERT INTO my_table VALUES (1, 'hello')" ) - assert ( - "QUERY_IS_PROHIBITED" - in reading_node.query_and_get_error("INSERT INTO my_table SETTINGS async_insert = 1 VALUES (1, 'hello')") + assert "QUERY_IS_PROHIBITED" in reading_node.query_and_get_error( + "INSERT INTO my_table SETTINGS async_insert = 1 VALUES (1, 'hello')" ) - assert ( - "QUERY_IS_PROHIBITED" - in reading_node.query_and_get_error("ALTER TABLE my_table delete where 1") + assert "QUERY_IS_PROHIBITED" in reading_node.query_and_get_error( + "ALTER TABLE my_table delete where 1" + ) + + + assert "QUERY_IS_PROHIBITED" in reading_node.query_and_get_error( + "ALTER table my_table update key = 1 where 1" ) - assert ( - "QUERY_IS_PROHIBITED" - in reading_node.query_and_get_error("ALTER table my_table update key = 1 where 1") - ) - - assert ( - "QUERY_IS_PROHIBITED" - in reading_node.query_and_get_error("ALTER TABLE my_table drop partition 0") + assert "QUERY_IS_PROHIBITED" in reading_node.query_and_get_error( + "ALTER TABLE my_table drop partition 0" ) reading_node.query("SELECT * from my_table"); @@ -73,12 +69,10 @@ def test_disable_insertion_and_mutation(started_cluster): reading_node.query("ALter Table my_table MODIFY COLUMN new_column String") - assert( - "new_column\tString" - in reading_node.query("DESC my_table") + assert "new_column\tString" in reading_node.query( + "DESC my_table" ) - assert( - "new_column\tString" - in writing_node.query("DESC my_table") + assert "new_column\tString" in writing_node.query( + "DESC my_table" ) From 2e1f679ceb05afe4d5d813eb4048555c6311c3e1 Mon Sep 17 00:00:00 2001 From: Michael Stetsyuk Date: Tue, 16 Jul 2024 18:35:33 +0200 Subject: [PATCH 067/661] add S3DiskNoKeyErrors metric --- src/Common/CurrentMetrics.cpp | 2 ++ src/IO/S3/Client.cpp | 21 +++++++++++++++------ 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/src/Common/CurrentMetrics.cpp b/src/Common/CurrentMetrics.cpp index 7c97e73f278..2fedba0175b 100644 --- a/src/Common/CurrentMetrics.cpp +++ b/src/Common/CurrentMetrics.cpp @@ -306,6 +306,8 @@ \ M(FilteringMarksWithPrimaryKey, "Number of threads currently doing filtering of mark ranges by the primary key") \ M(FilteringMarksWithSecondaryKeys, "Number of threads currently doing filtering of mark ranges by secondary keys") \ + \ + M(S3DiskNoKeyErrors, "Number of no-key S3 disk errors") \ #ifdef APPLY_FOR_EXTERNAL_METRICS #define APPLY_FOR_METRICS(M) APPLY_FOR_BUILTIN_METRICS(M) APPLY_FOR_EXTERNAL_METRICS(M) diff --git a/src/IO/S3/Client.cpp b/src/IO/S3/Client.cpp index 3b958dea046..db20420db9f 100644 --- a/src/IO/S3/Client.cpp +++ b/src/IO/S3/Client.cpp @@ -24,6 +24,7 @@ #include #include +#include #include #include @@ -43,6 +44,11 @@ namespace ProfileEvents extern const Event TinyS3Clients; } +namespace CurrentMetrics +{ + extern const Metric S3DiskNoKeyErrors; +} + namespace DB { @@ -379,10 +385,10 @@ Model::HeadObjectOutcome Client::HeadObject(HeadObjectRequest & request) const request.overrideURI(std::move(*bucket_uri)); - /// The next call is NOT a recurcive call - /// This is a virtuall call Aws::S3::S3Client::HeadObject(const Model::HeadObjectRequest&) - return enrichErrorMessage( - HeadObject(static_cast(request))); + if (isClientForDisk()) + CurrentMetrics::add(CurrentMetrics::S3DiskNoKeyErrors); + + return enrichErrorMessage(std::move(result)); } /// For each request, we wrap the request functions from Aws::S3::Client with doRequest @@ -402,8 +408,11 @@ Model::ListObjectsOutcome Client::ListObjects(ListObjectsRequest & request) cons Model::GetObjectOutcome Client::GetObject(GetObjectRequest & request) const { - return enrichErrorMessage( - doRequest(request, [this](const Model::GetObjectRequest & req) { return GetObject(req); })); + auto resp = doRequest(request, [this](const Model::GetObjectRequest & req) { return GetObject(req); }); + if (!resp.IsSuccess() && isClientForDisk()) + CurrentMetrics::add(CurrentMetrics::S3DiskNoKeyErrors); + + return enrichErrorMessage(std::move(resp)); } Model::AbortMultipartUploadOutcome Client::AbortMultipartUpload(AbortMultipartUploadRequest & request) const From 884dc496a0a978074d3e0bd70f4df8d0225e69c1 Mon Sep 17 00:00:00 2001 From: Xu Jia Date: Thu, 18 Jul 2024 20:58:37 +0800 Subject: [PATCH 068/661] format --- .../test_disable_insertion_and_mutation/test.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/tests/integration/test_disable_insertion_and_mutation/test.py b/tests/integration/test_disable_insertion_and_mutation/test.py index f25964d27b8..b6431690245 100644 --- a/tests/integration/test_disable_insertion_and_mutation/test.py +++ b/tests/integration/test_disable_insertion_and_mutation/test.py @@ -35,7 +35,9 @@ def started_cluster(): def test_disable_insertion_and_mutation(started_cluster): - writing_node.query("""CREATE TABLE my_table on cluster default (key UInt64, value String) ENGINE=ReplicatedMergeTree('/clickhouse/tables/{shard}/default.my_table', '{replica}') ORDER BY key partition by (key % 5) """) + writing_node.query( + """CREATE TABLE my_table on cluster default (key UInt64, value String) ENGINE=ReplicatedMergeTree('/clickhouse/tables/{shard}/default.my_table', '{replica}') ORDER BY key partition by (key % 5) """ + ) assert "QUERY_IS_PROHIBITED" in reading_node.query_and_get_error( "INSERT INTO my_table VALUES (1, 'hello')" @@ -58,7 +60,7 @@ def test_disable_insertion_and_mutation(started_cluster): "ALTER TABLE my_table drop partition 0" ) - reading_node.query("SELECT * from my_table"); + reading_node.query("SELECT * from my_table") writing_node.query("INSERT INTO my_table VALUES (1, 'hello')") writing_node.query("ALTER TABLE my_table delete where 1") writing_node.query("ALTER table my_table update value = 'no hello' where 1") @@ -69,10 +71,6 @@ def test_disable_insertion_and_mutation(started_cluster): reading_node.query("ALter Table my_table MODIFY COLUMN new_column String") - assert "new_column\tString" in reading_node.query( - "DESC my_table" - ) + assert "new_column\tString" in reading_node.query("DESC my_table") - assert "new_column\tString" in writing_node.query( - "DESC my_table" - ) + assert "new_column\tString" in writing_node.query("DESC my_table") From c01e2cbeea02ebecfc4dea4692baffff3087b043 Mon Sep 17 00:00:00 2001 From: Xu Jia Date: Thu, 18 Jul 2024 22:58:30 +0800 Subject: [PATCH 069/661] format --- tests/integration/test_disable_insertion_and_mutation/test.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/integration/test_disable_insertion_and_mutation/test.py b/tests/integration/test_disable_insertion_and_mutation/test.py index b6431690245..b6da7ed548f 100644 --- a/tests/integration/test_disable_insertion_and_mutation/test.py +++ b/tests/integration/test_disable_insertion_and_mutation/test.py @@ -50,7 +50,6 @@ def test_disable_insertion_and_mutation(started_cluster): assert "QUERY_IS_PROHIBITED" in reading_node.query_and_get_error( "ALTER TABLE my_table delete where 1" ) - assert "QUERY_IS_PROHIBITED" in reading_node.query_and_get_error( "ALTER table my_table update key = 1 where 1" From cda846339be22c66cd0d35d49273a314fa3bdf69 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 18 Jul 2024 15:12:32 +0000 Subject: [PATCH 070/661] Remove ActionsDAG::clone --- src/Interpreters/ActionsDAG.cpp | 8 ---- src/Interpreters/ActionsDAG.h | 3 -- src/Interpreters/ExpressionAnalyzer.cpp | 9 +---- src/Interpreters/InterpreterSelectQuery.cpp | 24 ++++++------ src/Interpreters/MutationsInterpreter.cpp | 4 +- src/Planner/Planner.cpp | 38 +++++++++---------- src/Planner/PlannerExpressionAnalysis.h | 16 ++++---- src/Planner/PlannerJoins.cpp | 4 +- src/Processors/QueryPlan/ExpressionStep.cpp | 4 +- src/Processors/QueryPlan/FilterStep.cpp | 4 +- .../Optimizations/distinctReadInOrder.cpp | 10 ++--- .../Optimizations/filterPushDown.cpp | 4 +- .../QueryPlan/Optimizations/liftUpUnion.cpp | 2 +- .../optimizePrimaryKeyConditionAndLimit.cpp | 6 +-- .../Optimizations/optimizeReadInOrder.cpp | 28 +++++++------- .../optimizeUseAggregateProjection.cpp | 4 +- .../Optimizations/projectionsCommon.cpp | 7 ++-- .../Optimizations/removeRedundantDistinct.cpp | 20 +++++----- .../QueryPlan/ReadFromMergeTree.cpp | 16 ++++---- .../QueryPlan/SourceStepWithFilter.cpp | 8 ++-- src/Processors/QueryPlan/TotalsHavingStep.cpp | 22 +++++++---- .../Transforms/FillingTransform.cpp | 2 +- src/Storages/MergeTree/MergeTreeIndexSet.cpp | 4 +- .../MergeTree/MergeTreeSelectProcessor.cpp | 4 +- src/Storages/SelectQueryInfo.h | 4 +- src/Storages/StorageBuffer.cpp | 10 +++-- src/Storages/StorageMerge.cpp | 8 ++-- src/Storages/VirtualColumnUtils.cpp | 2 +- 28 files changed, 135 insertions(+), 140 deletions(-) diff --git a/src/Interpreters/ActionsDAG.cpp b/src/Interpreters/ActionsDAG.cpp index e001406408f..53e04f24829 100644 --- a/src/Interpreters/ActionsDAG.cpp +++ b/src/Interpreters/ActionsDAG.cpp @@ -1246,14 +1246,6 @@ bool ActionsDAG::removeUnusedResult(const std::string & column_name) return true; } -ActionsDAGPtr ActionsDAG::clone(const ActionsDAG * from) -{ - std::unordered_map old_to_new_nodes; - if (from == nullptr) - return nullptr; - return std::make_unique(from->clone(old_to_new_nodes)); -} - ActionsDAG ActionsDAG::clone() const { std::unordered_map old_to_new_nodes; diff --git a/src/Interpreters/ActionsDAG.h b/src/Interpreters/ActionsDAG.h index 6f5c3d3b0df..6f6c3f9bccb 100644 --- a/src/Interpreters/ActionsDAG.h +++ b/src/Interpreters/ActionsDAG.h @@ -261,8 +261,6 @@ public: void compileExpressions(size_t min_count_to_compile_expression, const std::unordered_set & lazy_executed_nodes = {}); #endif - static ActionsDAGPtr clone(const ActionsDAG * from); - ActionsDAG clone(std::unordered_map & old_to_new_nodes) const; ActionsDAG clone() const; @@ -491,7 +489,6 @@ public: const ActionsDAG::Node * find(const String & output_name); private: - //const ActionsDAG & actions; NameToNodeIndex index; }; diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index 0f350602777..6b5b129085d 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -1944,10 +1944,7 @@ ExpressionAnalysisResult::ExpressionAnalysisResult( Block before_prewhere_sample = source_header; if (sanitizeBlock(before_prewhere_sample)) { - ActionsDAG dag = std::move(*ActionsDAG::clone(&prewhere_dag_and_flags->dag)); - ExpressionActions( - std::move(dag), - ExpressionActionsSettings::fromSettings(context->getSettingsRef())).execute(before_prewhere_sample); + prewhere_dag_and_flags->dag.updateHeader(before_prewhere_sample); auto & column_elem = before_prewhere_sample.getByName(query.prewhere()->getColumnName()); /// If the filter column is a constant, record it. if (column_elem.column) @@ -1979,9 +1976,7 @@ ExpressionAnalysisResult::ExpressionAnalysisResult( before_where_sample = source_header; if (sanitizeBlock(before_where_sample)) { - ExpressionActions( - std::move(*ActionsDAG::clone(&before_where->dag)), - ExpressionActionsSettings::fromSettings(context->getSettingsRef())).execute(before_where_sample); + before_where->dag.updateHeader(before_where_sample); auto & column_elem = before_where_sample.getByName(query.where()->getColumnName()); diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index c85eb8310dc..e0073a6af5d 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -1501,7 +1501,7 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional

( query_plan.getCurrentDataStream(), - std::move(*ActionsDAG::clone(&expressions.filter_info->actions)), + expressions.filter_info->actions.clone(), expressions.filter_info->column_name, expressions.filter_info->do_remove_column); @@ -1515,7 +1515,7 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional

( query_plan.getCurrentDataStream(), - std::move(*ActionsDAG::clone(&*expressions.prewhere_info->row_level_filter)), + expressions.prewhere_info->row_level_filter->clone(), expressions.prewhere_info->row_level_column_name, true); @@ -1525,7 +1525,7 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional

( query_plan.getCurrentDataStream(), - std::move(*ActionsDAG::clone(&*expressions.prewhere_info->prewhere_actions)), + expressions.prewhere_info->prewhere_actions->clone(), expressions.prewhere_info->prewhere_column_name, expressions.prewhere_info->remove_prewhere_column); @@ -1627,7 +1627,7 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional

( query_plan.getCurrentDataStream(), - std::move(*ActionsDAG::clone(&expressions.filter_info->actions)), + expressions.filter_info->actions.clone(), expressions.filter_info->column_name, expressions.filter_info->do_remove_column); @@ -2056,20 +2056,22 @@ void InterpreterSelectQuery::addEmptySourceToQueryPlan(QueryPlan & query_plan, c { auto & prewhere_info = *query_info.prewhere_info; + auto row_level_actions = std::make_shared(prewhere_info.row_level_filter->clone()); if (prewhere_info.row_level_filter) { pipe.addSimpleTransform([&](const Block & header) { return std::make_shared(header, - std::make_shared(std::move(*ActionsDAG::clone(&*prewhere_info.row_level_filter))), + row_level_actions, prewhere_info.row_level_column_name, true); }); } + auto filter_actions = std::make_shared(prewhere_info.prewhere_actions->clone()); pipe.addSimpleTransform([&](const Block & header) { return std::make_shared( - header, std::make_shared(std::move(*ActionsDAG::clone(&*prewhere_info.prewhere_actions))), + header, filter_actions, prewhere_info.prewhere_column_name, prewhere_info.remove_prewhere_column); }); } @@ -2589,7 +2591,7 @@ void InterpreterSelectQuery::executeFetchColumns(QueryProcessingStage::Enum proc /// Aliases in table declaration. if (processing_stage == QueryProcessingStage::FetchColumns && alias_actions) { - auto table_aliases = std::make_unique(query_plan.getCurrentDataStream(), std::move(*ActionsDAG::clone(&*alias_actions))); + auto table_aliases = std::make_unique(query_plan.getCurrentDataStream(), alias_actions->clone()); table_aliases->setStepDescription("Add table aliases"); query_plan.addStep(std::move(table_aliases)); } @@ -2597,7 +2599,7 @@ void InterpreterSelectQuery::executeFetchColumns(QueryProcessingStage::Enum proc void InterpreterSelectQuery::executeWhere(QueryPlan & query_plan, const ActionsAndProjectInputsFlagPtr & expression, bool remove_filter) { - auto dag = std::move(*ActionsDAG::clone(&expression->dag)); + auto dag = expression->dag.clone(); if (expression->project_input) dag.appendInputsForUnusedColumns(query_plan.getCurrentDataStream().header); @@ -2771,7 +2773,7 @@ void InterpreterSelectQuery::executeMergeAggregated(QueryPlan & query_plan, bool void InterpreterSelectQuery::executeHaving(QueryPlan & query_plan, const ActionsAndProjectInputsFlagPtr & expression, bool remove_filter) { - auto dag = std::move(*ActionsDAG::clone(&expression->dag)); + auto dag = expression->dag.clone(); if (expression->project_input) dag.appendInputsForUnusedColumns(query_plan.getCurrentDataStream().header); @@ -2789,7 +2791,7 @@ void InterpreterSelectQuery::executeTotalsAndHaving( std::optional dag; if (expression) { - dag = std::move(*ActionsDAG::clone(&expression->dag)); + dag = expression->dag.clone(); if (expression->project_input) dag->appendInputsForUnusedColumns(query_plan.getCurrentDataStream().header); } @@ -2838,7 +2840,7 @@ void InterpreterSelectQuery::executeExpression(QueryPlan & query_plan, const Act if (!expression) return; - ActionsDAG dag = std::move(*ActionsDAG::clone(&expression->dag)); + auto dag = expression->dag.clone(); if (expression->project_input) dag.appendInputsForUnusedColumns(query_plan.getCurrentDataStream().header); diff --git a/src/Interpreters/MutationsInterpreter.cpp b/src/Interpreters/MutationsInterpreter.cpp index 5b710149d85..57ad5caa4c7 100644 --- a/src/Interpreters/MutationsInterpreter.cpp +++ b/src/Interpreters/MutationsInterpreter.cpp @@ -1281,7 +1281,7 @@ QueryPipelineBuilder MutationsInterpreter::addStreamsForLaterStages(const std::v if (i < stage.filter_column_names.size()) { - auto dag = std::move(*ActionsDAG::clone(&step->actions()->dag)); + auto dag = step->actions()->dag.clone(); if (step->actions()->project_input) dag.appendInputsForUnusedColumns(plan.getCurrentDataStream().header); /// Execute DELETEs. @@ -1289,7 +1289,7 @@ QueryPipelineBuilder MutationsInterpreter::addStreamsForLaterStages(const std::v } else { - auto dag = std::move(*ActionsDAG::clone(&step->actions()->dag)); + auto dag = step->actions()->dag.clone(); if (step->actions()->project_input) dag.appendInputsForUnusedColumns(plan.getCurrentDataStream().header); /// Execute UPDATE or final projection. diff --git a/src/Planner/Planner.cpp b/src/Planner/Planner.cpp index e087c3691b4..fb721069e6e 100644 --- a/src/Planner/Planner.cpp +++ b/src/Planner/Planner.cpp @@ -333,11 +333,11 @@ public: }; void addExpressionStep(QueryPlan & query_plan, - const ActionsAndProjectInputsFlagPtr & expression_actions, + ActionsAndProjectInputsFlagPtr & expression_actions, const std::string & step_description, UsefulSets & useful_sets) { - auto actions = std::move(*ActionsDAG::clone(&expression_actions->dag)); + auto actions = std::move(expression_actions->dag); if (expression_actions->project_input) actions.appendInputsForUnusedColumns(query_plan.getCurrentDataStream().header); @@ -348,11 +348,11 @@ void addExpressionStep(QueryPlan & query_plan, } void addFilterStep(QueryPlan & query_plan, - const FilterAnalysisResult & filter_analysis_result, + FilterAnalysisResult & filter_analysis_result, const std::string & step_description, UsefulSets & useful_sets) { - auto actions = std::move(*ActionsDAG::clone(&filter_analysis_result.filter_actions->dag)); + auto actions = std::move(filter_analysis_result.filter_actions->dag); if (filter_analysis_result.filter_actions->project_input) actions.appendInputsForUnusedColumns(query_plan.getCurrentDataStream().header); @@ -544,7 +544,7 @@ void addMergingAggregatedStep(QueryPlan & query_plan, } void addTotalsHavingStep(QueryPlan & query_plan, - const PlannerExpressionsAnalysisResult & expression_analysis_result, + PlannerExpressionsAnalysisResult & expression_analysis_result, const QueryAnalysisResult & query_analysis_result, const PlannerContextPtr & planner_context, const QueryNode & query_node, @@ -553,14 +553,14 @@ void addTotalsHavingStep(QueryPlan & query_plan, const auto & query_context = planner_context->getQueryContext(); const auto & settings = query_context->getSettingsRef(); - const auto & aggregation_analysis_result = expression_analysis_result.getAggregation(); - const auto & having_analysis_result = expression_analysis_result.getHaving(); + auto & aggregation_analysis_result = expression_analysis_result.getAggregation(); + auto & having_analysis_result = expression_analysis_result.getHaving(); bool need_finalize = !query_node.isGroupByWithRollup() && !query_node.isGroupByWithCube(); std::optional actions; if (having_analysis_result.filter_actions) { - actions = std::move(*ActionsDAG::clone(&having_analysis_result.filter_actions->dag)); + actions = std::move(having_analysis_result.filter_actions->dag); if (having_analysis_result.filter_actions->project_input) actions->appendInputsForUnusedColumns(query_plan.getCurrentDataStream().header); } @@ -886,7 +886,7 @@ bool addPreliminaryLimitOptimizationStepIfNeeded(QueryPlan & query_plan, * WINDOW functions. */ void addPreliminarySortOrDistinctOrLimitStepsIfNeeded(QueryPlan & query_plan, - const PlannerExpressionsAnalysisResult & expressions_analysis_result, + PlannerExpressionsAnalysisResult & expressions_analysis_result, const QueryAnalysisResult & query_analysis_result, const PlannerContextPtr & planner_context, const PlannerQueryProcessingInfo & query_processing_info, @@ -922,7 +922,7 @@ void addPreliminarySortOrDistinctOrLimitStepsIfNeeded(QueryPlan & query_plan, if (expressions_analysis_result.hasLimitBy()) { - const auto & limit_by_analysis_result = expressions_analysis_result.getLimitBy(); + auto & limit_by_analysis_result = expressions_analysis_result.getLimitBy(); addExpressionStep(query_plan, limit_by_analysis_result.before_limit_by_actions, "Before LIMIT BY", useful_sets); addLimitByStep(query_plan, limit_by_analysis_result, query_node); } @@ -1549,7 +1549,7 @@ void Planner::buildPlanForQueryNode() if (expression_analysis_result.hasAggregation()) { - const auto & aggregation_analysis_result = expression_analysis_result.getAggregation(); + auto & aggregation_analysis_result = expression_analysis_result.getAggregation(); if (aggregation_analysis_result.before_aggregation_actions) addExpressionStep(query_plan, aggregation_analysis_result.before_aggregation_actions, "Before GROUP BY", useful_sets); @@ -1568,7 +1568,7 @@ void Planner::buildPlanForQueryNode() * window functions, we can't execute ORDER BY and DISTINCT * now, on shard (first_stage). */ - const auto & window_analysis_result = expression_analysis_result.getWindow(); + auto & window_analysis_result = expression_analysis_result.getWindow(); if (window_analysis_result.before_window_actions) addExpressionStep(query_plan, window_analysis_result.before_window_actions, "Before WINDOW", useful_sets); } @@ -1578,7 +1578,7 @@ void Planner::buildPlanForQueryNode() * Projection expressions, preliminary DISTINCT and before ORDER BY expressions * now, on shards (first_stage). */ - const auto & projection_analysis_result = expression_analysis_result.getProjection(); + auto & projection_analysis_result = expression_analysis_result.getProjection(); addExpressionStep(query_plan, projection_analysis_result.projection_actions, "Projection", useful_sets); if (query_node.isDistinct()) @@ -1594,7 +1594,7 @@ void Planner::buildPlanForQueryNode() if (expression_analysis_result.hasSort()) { - const auto & sort_analysis_result = expression_analysis_result.getSort(); + auto & sort_analysis_result = expression_analysis_result.getSort(); addExpressionStep(query_plan, sort_analysis_result.before_order_by_actions, "Before ORDER BY", useful_sets); } } @@ -1648,7 +1648,7 @@ void Planner::buildPlanForQueryNode() { if (expression_analysis_result.hasWindow()) { - const auto & window_analysis_result = expression_analysis_result.getWindow(); + auto & window_analysis_result = expression_analysis_result.getWindow(); if (expression_analysis_result.hasAggregation()) addExpressionStep(query_plan, window_analysis_result.before_window_actions, "Before window functions", useful_sets); @@ -1658,7 +1658,7 @@ void Planner::buildPlanForQueryNode() if (expression_analysis_result.hasQualify()) addFilterStep(query_plan, expression_analysis_result.getQualify(), "QUALIFY", useful_sets); - const auto & projection_analysis_result = expression_analysis_result.getProjection(); + auto & projection_analysis_result = expression_analysis_result.getProjection(); addExpressionStep(query_plan, projection_analysis_result.projection_actions, "Projection", useful_sets); if (query_node.isDistinct()) @@ -1674,7 +1674,7 @@ void Planner::buildPlanForQueryNode() if (expression_analysis_result.hasSort()) { - const auto & sort_analysis_result = expression_analysis_result.getSort(); + auto & sort_analysis_result = expression_analysis_result.getSort(); addExpressionStep(query_plan, sort_analysis_result.before_order_by_actions, "Before ORDER BY", useful_sets); } } @@ -1727,7 +1727,7 @@ void Planner::buildPlanForQueryNode() if (!query_processing_info.isFromAggregationState() && expression_analysis_result.hasLimitBy()) { - const auto & limit_by_analysis_result = expression_analysis_result.getLimitBy(); + auto & limit_by_analysis_result = expression_analysis_result.getLimitBy(); addExpressionStep(query_plan, limit_by_analysis_result.before_limit_by_actions, "Before LIMIT BY", useful_sets); addLimitByStep(query_plan, limit_by_analysis_result, query_node); } @@ -1759,7 +1759,7 @@ void Planner::buildPlanForQueryNode() /// Project names is not done on shards, because initiator will not find columns in blocks if (!query_processing_info.isToAggregationState()) { - const auto & projection_analysis_result = expression_analysis_result.getProjection(); + auto & projection_analysis_result = expression_analysis_result.getProjection(); addExpressionStep(query_plan, projection_analysis_result.project_names_actions, "Project names", useful_sets); } diff --git a/src/Planner/PlannerExpressionAnalysis.h b/src/Planner/PlannerExpressionAnalysis.h index 820df7131a7..283fcac7aba 100644 --- a/src/Planner/PlannerExpressionAnalysis.h +++ b/src/Planner/PlannerExpressionAnalysis.h @@ -64,7 +64,7 @@ public: : projection_analysis_result(std::move(projection_analysis_result_)) {} - const ProjectionAnalysisResult & getProjection() const + ProjectionAnalysisResult & getProjection() { return projection_analysis_result; } @@ -74,7 +74,7 @@ public: return where_analysis_result.filter_actions != nullptr; } - const FilterAnalysisResult & getWhere() const + FilterAnalysisResult & getWhere() { return where_analysis_result; } @@ -89,7 +89,7 @@ public: return !aggregation_analysis_result.aggregation_keys.empty() || !aggregation_analysis_result.aggregate_descriptions.empty(); } - const AggregationAnalysisResult & getAggregation() const + AggregationAnalysisResult & getAggregation() { return aggregation_analysis_result; } @@ -104,7 +104,7 @@ public: return having_analysis_result.filter_actions != nullptr; } - const FilterAnalysisResult & getHaving() const + FilterAnalysisResult & getHaving() { return having_analysis_result; } @@ -119,7 +119,7 @@ public: return !window_analysis_result.window_descriptions.empty(); } - const WindowAnalysisResult & getWindow() const + WindowAnalysisResult & getWindow() { return window_analysis_result; } @@ -134,7 +134,7 @@ public: return qualify_analysis_result.filter_actions != nullptr; } - const FilterAnalysisResult & getQualify() const + FilterAnalysisResult & getQualify() { return qualify_analysis_result; } @@ -149,7 +149,7 @@ public: return sort_analysis_result.before_order_by_actions != nullptr; } - const SortAnalysisResult & getSort() const + SortAnalysisResult & getSort() { return sort_analysis_result; } @@ -164,7 +164,7 @@ public: return limit_by_analysis_result.before_limit_by_actions != nullptr; } - const LimitByAnalysisResult & getLimitBy() const + LimitByAnalysisResult & getLimitBy() { return limit_by_analysis_result; } diff --git a/src/Planner/PlannerJoins.cpp b/src/Planner/PlannerJoins.cpp index 4cf1b138bed..5acff9dac82 100644 --- a/src/Planner/PlannerJoins.cpp +++ b/src/Planner/PlannerJoins.cpp @@ -591,10 +591,10 @@ JoinClausesAndActions buildJoinClausesAndActions( } } - result.left_join_expressions_actions = std::move(*ActionsDAG::clone(&left_join_actions)); + result.left_join_expressions_actions = left_join_actions.clone(); result.left_join_tmp_expression_actions = std::move(left_join_actions); result.left_join_expressions_actions.removeUnusedActions(join_left_actions_names); - result.right_join_expressions_actions = std::move(*ActionsDAG::clone(&right_join_actions)); + result.right_join_expressions_actions = right_join_actions.clone(); result.right_join_tmp_expression_actions = std::move(right_join_actions); result.right_join_expressions_actions.removeUnusedActions(join_right_actions_names); diff --git a/src/Processors/QueryPlan/ExpressionStep.cpp b/src/Processors/QueryPlan/ExpressionStep.cpp index 94098f443d9..6f88c4527a4 100644 --- a/src/Processors/QueryPlan/ExpressionStep.cpp +++ b/src/Processors/QueryPlan/ExpressionStep.cpp @@ -61,13 +61,13 @@ void ExpressionStep::transformPipeline(QueryPipelineBuilder & pipeline, const Bu void ExpressionStep::describeActions(FormatSettings & settings) const { String prefix(settings.offset, settings.indent_char); - auto expression = std::make_shared(std::move(*ActionsDAG::clone(&actions_dag))); + auto expression = std::make_shared(actions_dag.clone()); expression->describeActions(settings.out, prefix); } void ExpressionStep::describeActions(JSONBuilder::JSONMap & map) const { - auto expression = std::make_shared(std::move(*ActionsDAG::clone(&actions_dag))); + auto expression = std::make_shared(actions_dag.clone()); map.add("Expression", expression->toTree()); } diff --git a/src/Processors/QueryPlan/FilterStep.cpp b/src/Processors/QueryPlan/FilterStep.cpp index 5f15c5defac..0c6b71387b7 100644 --- a/src/Processors/QueryPlan/FilterStep.cpp +++ b/src/Processors/QueryPlan/FilterStep.cpp @@ -87,7 +87,7 @@ void FilterStep::describeActions(FormatSettings & settings) const settings.out << " (removed)"; settings.out << '\n'; - auto expression = std::make_shared(std::move(*ActionsDAG::clone(&actions_dag))); + auto expression = std::make_shared(actions_dag.clone()); expression->describeActions(settings.out, prefix); } @@ -96,7 +96,7 @@ void FilterStep::describeActions(JSONBuilder::JSONMap & map) const map.add("Filter Column", filter_column_name); map.add("Removes Filter", remove_filter_column); - auto expression = std::make_shared(std::move(*ActionsDAG::clone(&actions_dag))); + auto expression = std::make_shared(actions_dag.clone()); map.add("Expression", expression->toTree()); } diff --git a/src/Processors/QueryPlan/Optimizations/distinctReadInOrder.cpp b/src/Processors/QueryPlan/Optimizations/distinctReadInOrder.cpp index 8666912514e..37e61a6c388 100644 --- a/src/Processors/QueryPlan/Optimizations/distinctReadInOrder.cpp +++ b/src/Processors/QueryPlan/Optimizations/distinctReadInOrder.cpp @@ -10,18 +10,18 @@ namespace DB::QueryPlanOptimizations { /// build actions DAG from stack of steps -static ActionsDAGPtr buildActionsForPlanPath(std::vector & dag_stack) +static std::optional buildActionsForPlanPath(std::vector & dag_stack) { if (dag_stack.empty()) - return nullptr; + return {}; - ActionsDAGPtr path_actions = ActionsDAG::clone(dag_stack.back()); + ActionsDAG path_actions = dag_stack.back()->clone(); dag_stack.pop_back(); while (!dag_stack.empty()) { - ActionsDAGPtr clone = ActionsDAG::clone(dag_stack.back()); + ActionsDAG clone = dag_stack.back()->clone(); dag_stack.pop_back(); - path_actions->mergeInplace(std::move(*clone)); + path_actions.mergeInplace(std::move(clone)); } return path_actions; } diff --git a/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp b/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp index 411b20b1a32..73314f005b6 100644 --- a/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp +++ b/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp @@ -599,7 +599,7 @@ size_t tryPushDownFilter(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes filter_node.step = std::make_unique( filter_node.children.front()->step->getOutputStream(), - std::move(*ActionsDAG::clone(&filter->getExpression())), + filter->getExpression().clone(), filter->getFilterColumnName(), filter->removesFilterColumn()); } @@ -613,7 +613,7 @@ size_t tryPushDownFilter(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes if (auto * read_from_merge = typeid_cast(child.get())) { - FilterDAGInfo info{std::move(*ActionsDAG::clone(&filter->getExpression())), filter->getFilterColumnName(), filter->removesFilterColumn()}; + FilterDAGInfo info{filter->getExpression().clone(), filter->getFilterColumnName(), filter->removesFilterColumn()}; read_from_merge->addFilter(std::move(info)); std::swap(*parent_node, *child_node); return 1; diff --git a/src/Processors/QueryPlan/Optimizations/liftUpUnion.cpp b/src/Processors/QueryPlan/Optimizations/liftUpUnion.cpp index 53f59198d0f..c48551732c9 100644 --- a/src/Processors/QueryPlan/Optimizations/liftUpUnion.cpp +++ b/src/Processors/QueryPlan/Optimizations/liftUpUnion.cpp @@ -49,7 +49,7 @@ size_t tryLiftUpUnion(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes) expr_node.step = std::make_unique( expr_node.children.front()->step->getOutputStream(), - std::move(*ActionsDAG::clone(&expression->getExpression()))); + expression->getExpression().clone()); } /// - Expression - Something diff --git a/src/Processors/QueryPlan/Optimizations/optimizePrimaryKeyConditionAndLimit.cpp b/src/Processors/QueryPlan/Optimizations/optimizePrimaryKeyConditionAndLimit.cpp index 71a7ca327b1..63b4e019066 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizePrimaryKeyConditionAndLimit.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizePrimaryKeyConditionAndLimit.cpp @@ -18,16 +18,16 @@ void optimizePrimaryKeyConditionAndLimit(const Stack & stack) const auto & storage_prewhere_info = source_step_with_filter->getPrewhereInfo(); if (storage_prewhere_info) { - source_step_with_filter->addFilter(ActionsDAG::clone(&*storage_prewhere_info->prewhere_actions), storage_prewhere_info->prewhere_column_name); + source_step_with_filter->addFilter(std::make_unique(storage_prewhere_info->prewhere_actions->clone()), storage_prewhere_info->prewhere_column_name); if (storage_prewhere_info->row_level_filter) - source_step_with_filter->addFilter(ActionsDAG::clone(&*storage_prewhere_info->row_level_filter), storage_prewhere_info->row_level_column_name); + source_step_with_filter->addFilter(std::make_unique(storage_prewhere_info->row_level_filter->clone()), storage_prewhere_info->row_level_column_name); } for (auto iter = stack.rbegin() + 1; iter != stack.rend(); ++iter) { if (auto * filter_step = typeid_cast(iter->node->step.get())) { - source_step_with_filter->addFilter(ActionsDAG::clone(&filter_step->getExpression()), filter_step->getFilterColumnName()); + source_step_with_filter->addFilter(std::make_unique(filter_step->getExpression().clone()), filter_step->getFilterColumnName()); } else if (auto * limit_step = typeid_cast(iter->node->step.get())) { diff --git a/src/Processors/QueryPlan/Optimizations/optimizeReadInOrder.cpp b/src/Processors/QueryPlan/Optimizations/optimizeReadInOrder.cpp index 99aaef6d054..252420e19fe 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizeReadInOrder.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizeReadInOrder.cpp @@ -171,17 +171,17 @@ static void appendFixedColumnsFromFilterExpression(const ActionsDAG::Node & filt } } -static void appendExpression(ActionsDAGPtr & dag, const ActionsDAG & expression) +static void appendExpression(std::optional & dag, const ActionsDAG & expression) { if (dag) - dag->mergeInplace(std::move(*ActionsDAG::clone(&expression))); + dag->mergeInplace(expression.clone()); else - dag = ActionsDAG::clone(&expression); + dag = expression.clone(); } /// This function builds a common DAG which is a merge of DAGs from Filter and Expression steps chain. /// Additionally, build a set of fixed columns. -void buildSortingDAG(QueryPlan::Node & node, ActionsDAGPtr & dag, FixedColumns & fixed_columns, size_t & limit) +void buildSortingDAG(QueryPlan::Node & node, std::optional & dag, FixedColumns & fixed_columns, size_t & limit) { IQueryPlanStep * step = node.step.get(); if (auto * reading = typeid_cast(step)) @@ -330,7 +330,7 @@ void enreachFixedColumns(const ActionsDAG & dag, FixedColumns & fixed_columns) InputOrderInfoPtr buildInputOrderInfo( const FixedColumns & fixed_columns, - const ActionsDAGPtr & dag, + const std::optional & dag, const SortDescription & description, const KeyDescription & sorting_key, size_t limit) @@ -507,7 +507,7 @@ struct AggregationInputOrder AggregationInputOrder buildInputOrderInfo( const FixedColumns & fixed_columns, - const ActionsDAGPtr & dag, + const std::optional & dag, const Names & group_by_keys, const ActionsDAG & sorting_key_dag, const Names & sorting_key_columns) @@ -693,7 +693,7 @@ AggregationInputOrder buildInputOrderInfo( InputOrderInfoPtr buildInputOrderInfo( const ReadFromMergeTree * reading, const FixedColumns & fixed_columns, - const ActionsDAGPtr & dag, + const std::optional & dag, const SortDescription & description, size_t limit) { @@ -709,7 +709,7 @@ InputOrderInfoPtr buildInputOrderInfo( InputOrderInfoPtr buildInputOrderInfo( ReadFromMerge * merge, const FixedColumns & fixed_columns, - const ActionsDAGPtr & dag, + const std::optional & dag, const SortDescription & description, size_t limit) { @@ -745,7 +745,7 @@ InputOrderInfoPtr buildInputOrderInfo( AggregationInputOrder buildInputOrderInfo( ReadFromMergeTree * reading, const FixedColumns & fixed_columns, - const ActionsDAGPtr & dag, + const std::optional & dag, const Names & group_by_keys) { const auto & sorting_key = reading->getStorageMetadata()->getSortingKey(); @@ -760,7 +760,7 @@ AggregationInputOrder buildInputOrderInfo( AggregationInputOrder buildInputOrderInfo( ReadFromMerge * merge, const FixedColumns & fixed_columns, - const ActionsDAGPtr & dag, + const std::optional & dag, const Names & group_by_keys) { const auto & tables = merge->getSelectedTables(); @@ -801,7 +801,7 @@ InputOrderInfoPtr buildInputOrderInfo(SortingStep & sorting, QueryPlan::Node & n const auto & description = sorting.getSortDescription(); size_t limit = sorting.getLimit(); - ActionsDAGPtr dag; + std::optional dag; FixedColumns fixed_columns; buildSortingDAG(node, dag, fixed_columns, limit); @@ -855,7 +855,7 @@ AggregationInputOrder buildInputOrderInfo(AggregatingStep & aggregating, QueryPl const auto & keys = aggregating.getParams().keys; size_t limit = 0; - ActionsDAGPtr dag; + std::optional dag; FixedColumns fixed_columns; buildSortingDAG(node, dag, fixed_columns, limit); @@ -1076,13 +1076,13 @@ size_t tryReuseStorageOrderingForWindowFunctions(QueryPlan::Node * parent_node, for (const auto & actions_dag : window_desc.partition_by_actions) { order_by_elements_actions.emplace_back( - std::make_shared(std::move(*ActionsDAG::clone(actions_dag.get())), ExpressionActionsSettings::fromContext(context, CompileExpressions::yes))); + std::make_shared(actions_dag->clone(), ExpressionActionsSettings::fromContext(context, CompileExpressions::yes))); } for (const auto & actions_dag : window_desc.order_by_actions) { order_by_elements_actions.emplace_back( - std::make_shared(std::move(*ActionsDAG::clone(actions_dag.get())), ExpressionActionsSettings::fromContext(context, CompileExpressions::yes))); + std::make_shared(actions_dag->clone(), ExpressionActionsSettings::fromContext(context, CompileExpressions::yes))); } auto order_optimizer = std::make_shared( diff --git a/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp b/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp index 4448d4b7869..ad89cec5f79 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp @@ -43,7 +43,7 @@ static DAGIndex buildDAGIndex(const ActionsDAG & dag) /// Required analysis info from aggregate projection. struct AggregateProjectionInfo { - ActionsDAGPtr before_aggregation; + std::optional before_aggregation; Names keys; AggregateDescriptions aggregates; @@ -78,7 +78,7 @@ static AggregateProjectionInfo getAggregatingProjectionInfo( AggregateProjectionInfo info; info.context = interpreter.getContext(); - info.before_aggregation = ActionsDAG::clone(&analysis_result.before_aggregation->dag); + info.before_aggregation = analysis_result.before_aggregation->dag.clone(); info.keys = query_analyzer->aggregationKeys().getNames(); info.aggregates = query_analyzer->aggregates(); diff --git a/src/Processors/QueryPlan/Optimizations/projectionsCommon.cpp b/src/Processors/QueryPlan/Optimizations/projectionsCommon.cpp index ad76976becc..571d1dd0cc1 100644 --- a/src/Processors/QueryPlan/Optimizations/projectionsCommon.cpp +++ b/src/Processors/QueryPlan/Optimizations/projectionsCommon.cpp @@ -68,9 +68,9 @@ std::shared_ptr getMaxAddedBlocks(ReadFromMergeTree * rea void QueryDAG::appendExpression(const ActionsDAG & expression) { if (dag) - dag->mergeInplace(std::move(*ActionsDAG::clone(&expression))); + dag->mergeInplace(expression.clone()); else - dag = std::move(*ActionsDAG::clone(&expression)); + dag = expression.clone(); } const ActionsDAG::Node * findInOutputs(ActionsDAG & dag, const std::string & name, bool remove) @@ -239,7 +239,8 @@ bool analyzeProjectionCandidate( auto projection_query_info = query_info; projection_query_info.prewhere_info = nullptr; - projection_query_info.filter_actions_dag = ActionsDAG::clone(dag); + if (dag) + projection_query_info.filter_actions_dag = std::make_unique(dag->clone()); auto projection_result_ptr = reader.estimateNumMarksToRead( std::move(projection_parts), diff --git a/src/Processors/QueryPlan/Optimizations/removeRedundantDistinct.cpp b/src/Processors/QueryPlan/Optimizations/removeRedundantDistinct.cpp index d0acd8221d4..7664822cc7e 100644 --- a/src/Processors/QueryPlan/Optimizations/removeRedundantDistinct.cpp +++ b/src/Processors/QueryPlan/Optimizations/removeRedundantDistinct.cpp @@ -43,10 +43,10 @@ namespace } } - void logActionsDAG(const String & prefix, const ActionsDAGPtr & actions) + void logActionsDAG(const String & prefix, const ActionsDAG & actions) { if constexpr (debug_logging_enabled) - LOG_DEBUG(getLogger("redundantDistinct"), "{} :\n{}", prefix, actions->dumpDAG()); + LOG_DEBUG(getLogger("redundantDistinct"), "{} :\n{}", prefix, actions.dumpDAG()); } using DistinctColumns = std::set; @@ -65,19 +65,19 @@ namespace } /// build actions DAG from stack of steps - ActionsDAGPtr buildActionsForPlanPath(std::vector & dag_stack) + std::optional buildActionsForPlanPath(std::vector & dag_stack) { if (dag_stack.empty()) - return nullptr; + return {}; - ActionsDAGPtr path_actions = ActionsDAG::clone(dag_stack.back()); + ActionsDAG path_actions = dag_stack.back()->clone(); dag_stack.pop_back(); while (!dag_stack.empty()) { - ActionsDAGPtr clone = ActionsDAG::clone(dag_stack.back()); + ActionsDAG clone = dag_stack.back()->clone(); logActionsDAG("DAG to merge", clone); dag_stack.pop_back(); - path_actions->mergeInplace(std::move(*clone)); + path_actions.mergeInplace(std::move(clone)); } return path_actions; } @@ -260,15 +260,15 @@ namespace if (distinct_columns.size() != inner_distinct_columns.size()) return false; - ActionsDAGPtr path_actions; + ActionsDAG path_actions; if (!dag_stack.empty()) { /// build actions DAG to find original column names - path_actions = buildActionsForPlanPath(dag_stack); + path_actions = std::move(*buildActionsForPlanPath(dag_stack)); logActionsDAG("distinct pass: merged DAG", path_actions); /// compare columns of two DISTINCTs - FindOriginalNodeForOutputName original_node_finder(*path_actions); + FindOriginalNodeForOutputName original_node_finder(path_actions); for (const auto & column : distinct_columns) { const auto * alias_node = original_node_finder.find(String(column)); diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index add53f9d6b3..5dda4ddc18b 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -802,7 +802,7 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreams(RangesInDataParts && parts_ info.use_uncompressed_cache); }; - auto sorting_expr = std::make_shared(std::move(*ActionsDAG::clone(&metadata_for_reading->getSortingKey().expression->getActionsDAG()))); + auto sorting_expr = metadata_for_reading->getSortingKey().expression; SplitPartsWithRangesByPrimaryKeyResult split_ranges_result = splitPartsWithRangesByPrimaryKey( metadata_for_reading->getPrimaryKey(), @@ -1215,7 +1215,7 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsFinal( /// we will store lonely parts with level > 0 to use parallel select on them. RangesInDataParts non_intersecting_parts_by_primary_key; - auto sorting_expr = std::make_shared(std::move(*ActionsDAG::clone(&metadata_for_reading->getSortingKey().expression->getActionsDAG()))); + auto sorting_expr = metadata_for_reading->getSortingKey().expression; if (prewhere_info) { @@ -1523,7 +1523,7 @@ void ReadFromMergeTree::applyFilters(ActionDAGNodes added_filter_nodes) /// TODO: Get rid of filter_actions_dag in query_info after we move analysis of /// parallel replicas and unused shards into optimization, similar to projection analysis. if (filter_actions_dag) - query_info.filter_actions_dag = std::make_shared(std::move(*ActionsDAG::clone(&*filter_actions_dag))); + query_info.filter_actions_dag = std::make_shared(std::move(*filter_actions_dag)); buildIndexes( indexes, @@ -2004,7 +2004,7 @@ void ReadFromMergeTree::initializePipeline(QueryPipelineBuilder & pipeline, cons if (result.sampling.use_sampling) { - auto sampling_actions = std::make_shared(std::move(*ActionsDAG::clone(result.sampling.filter_expression.get()))); + auto sampling_actions = std::make_shared(result.sampling.filter_expression->clone()); pipe.addSimpleTransform([&](const Block & header) { return std::make_shared( @@ -2137,7 +2137,7 @@ void ReadFromMergeTree::describeActions(FormatSettings & format_settings) const format_settings.out << " (removed)"; format_settings.out << '\n'; - auto expression = std::make_shared(std::move(*ActionsDAG::clone(&*prewhere_info->prewhere_actions))); + auto expression = std::make_shared(prewhere_info->prewhere_actions->clone()); expression->describeActions(format_settings.out, prefix); } @@ -2146,7 +2146,7 @@ void ReadFromMergeTree::describeActions(FormatSettings & format_settings) const format_settings.out << prefix << "Row level filter" << '\n'; format_settings.out << prefix << "Row level filter column: " << prewhere_info->row_level_column_name << '\n'; - auto expression = std::make_shared(std::move(*ActionsDAG::clone(&*prewhere_info->row_level_filter))); + auto expression = std::make_shared(prewhere_info->row_level_filter->clone()); expression->describeActions(format_settings.out, prefix); } } @@ -2172,7 +2172,7 @@ void ReadFromMergeTree::describeActions(JSONBuilder::JSONMap & map) const std::unique_ptr prewhere_filter_map = std::make_unique(); prewhere_filter_map->add("Prewhere filter column", prewhere_info->prewhere_column_name); prewhere_filter_map->add("Prewhere filter remove filter column", prewhere_info->remove_prewhere_column); - auto expression = std::make_shared(std::move(*ActionsDAG::clone(&*prewhere_info->prewhere_actions))); + auto expression = std::make_shared(prewhere_info->prewhere_actions->clone()); prewhere_filter_map->add("Prewhere filter expression", expression->toTree()); prewhere_info_map->add("Prewhere filter", std::move(prewhere_filter_map)); @@ -2182,7 +2182,7 @@ void ReadFromMergeTree::describeActions(JSONBuilder::JSONMap & map) const { std::unique_ptr row_level_filter_map = std::make_unique(); row_level_filter_map->add("Row level filter column", prewhere_info->row_level_column_name); - auto expression = std::make_shared(std::move(*ActionsDAG::clone(&*prewhere_info->row_level_filter))); + auto expression = std::make_shared(prewhere_info->row_level_filter->clone()); row_level_filter_map->add("Row level filter expression", expression->toTree()); prewhere_info_map->add("Row level filter", std::move(row_level_filter_map)); diff --git a/src/Processors/QueryPlan/SourceStepWithFilter.cpp b/src/Processors/QueryPlan/SourceStepWithFilter.cpp index 55c9b5e442e..b91debc8239 100644 --- a/src/Processors/QueryPlan/SourceStepWithFilter.cpp +++ b/src/Processors/QueryPlan/SourceStepWithFilter.cpp @@ -110,7 +110,7 @@ void SourceStepWithFilter::describeActions(FormatSettings & format_settings) con format_settings.out << " (removed)"; format_settings.out << '\n'; - auto expression = std::make_shared(std::move(*ActionsDAG::clone(&*prewhere_info->prewhere_actions))); + auto expression = std::make_shared(prewhere_info->prewhere_actions->clone()); expression->describeActions(format_settings.out, prefix); } @@ -119,7 +119,7 @@ void SourceStepWithFilter::describeActions(FormatSettings & format_settings) con format_settings.out << prefix << "Row level filter" << '\n'; format_settings.out << prefix << "Row level filter column: " << prewhere_info->row_level_column_name << '\n'; - auto expression = std::make_shared(std::move(*ActionsDAG::clone(&*prewhere_info->row_level_filter))); + auto expression = std::make_shared(prewhere_info->row_level_filter->clone()); expression->describeActions(format_settings.out, prefix); } } @@ -137,7 +137,7 @@ void SourceStepWithFilter::describeActions(JSONBuilder::JSONMap & map) const std::unique_ptr prewhere_filter_map = std::make_unique(); prewhere_filter_map->add("Prewhere filter column", prewhere_info->prewhere_column_name); prewhere_filter_map->add("Prewhere filter remove filter column", prewhere_info->remove_prewhere_column); - auto expression = std::make_shared(std::move(*ActionsDAG::clone(&*prewhere_info->prewhere_actions))); + auto expression = std::make_shared(prewhere_info->prewhere_actions->clone()); prewhere_filter_map->add("Prewhere filter expression", expression->toTree()); prewhere_info_map->add("Prewhere filter", std::move(prewhere_filter_map)); @@ -147,7 +147,7 @@ void SourceStepWithFilter::describeActions(JSONBuilder::JSONMap & map) const { std::unique_ptr row_level_filter_map = std::make_unique(); row_level_filter_map->add("Row level filter column", prewhere_info->row_level_column_name); - auto expression = std::make_shared(std::move(*ActionsDAG::clone(&*prewhere_info->row_level_filter))); + auto expression = std::make_shared(prewhere_info->row_level_filter->clone()); row_level_filter_map->add("Row level filter expression", expression->toTree()); prewhere_info_map->add("Row level filter", std::move(row_level_filter_map)); diff --git a/src/Processors/QueryPlan/TotalsHavingStep.cpp b/src/Processors/QueryPlan/TotalsHavingStep.cpp index 70457918de1..2554053064f 100644 --- a/src/Processors/QueryPlan/TotalsHavingStep.cpp +++ b/src/Processors/QueryPlan/TotalsHavingStep.cpp @@ -101,13 +101,16 @@ void TotalsHavingStep::describeActions(FormatSettings & settings) const if (actions_dag) { bool first = true; - auto expression = std::make_shared(std::move(*ActionsDAG::clone(getActions()))); - for (const auto & action : expression->getActions()) + if (actions_dag) { - settings.out << prefix << (first ? "Actions: " - : " "); - first = false; - settings.out << action.toString() << '\n'; + auto expression = std::make_shared(actions_dag->clone()); + for (const auto & action : expression->getActions()) + { + settings.out << prefix << (first ? "Actions: " + : " "); + first = false; + settings.out << action.toString() << '\n'; + } } } } @@ -118,8 +121,11 @@ void TotalsHavingStep::describeActions(JSONBuilder::JSONMap & map) const if (actions_dag) { map.add("Filter column", filter_column_name); - auto expression = std::make_shared(std::move(*ActionsDAG::clone(getActions()))); - map.add("Expression", expression->toTree()); + if (actions_dag) + { + auto expression = std::make_shared(actions_dag->clone()); + map.add("Expression", expression->toTree()); + } } } diff --git a/src/Processors/Transforms/FillingTransform.cpp b/src/Processors/Transforms/FillingTransform.cpp index 36ffc515f43..9601f821cc8 100644 --- a/src/Processors/Transforms/FillingTransform.cpp +++ b/src/Processors/Transforms/FillingTransform.cpp @@ -203,7 +203,7 @@ FillingTransform::FillingTransform( , use_with_fill_by_sorting_prefix(use_with_fill_by_sorting_prefix_) { if (interpolate_description) - interpolate_actions = std::make_shared(std::move(*ActionsDAG::clone(&interpolate_description->actions))); + interpolate_actions = std::make_shared(interpolate_description->actions.clone()); std::vector is_fill_column(header_.columns()); for (size_t i = 0, size = fill_description.size(); i < size; ++i) diff --git a/src/Storages/MergeTree/MergeTreeIndexSet.cpp b/src/Storages/MergeTree/MergeTreeIndexSet.cpp index ca31ffc9de5..c0875ed184d 100644 --- a/src/Storages/MergeTree/MergeTreeIndexSet.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexSet.cpp @@ -265,7 +265,7 @@ MergeTreeIndexConditionSet::MergeTreeIndexConditionSet( if (!set->buildOrderedSetInplace(context)) return; - auto filter_actions_dag = std::move(*ActionsDAG::clone(filter_dag)); + auto filter_actions_dag = filter_dag->clone(); const auto * filter_actions_dag_node = filter_actions_dag.getOutputs().at(0); std::unordered_map node_to_result_node; @@ -319,7 +319,7 @@ static const ActionsDAG::NodeRawConstPtrs & getArguments(const ActionsDAG::Node return index_hint.getActions().getOutputs(); /// Import the DAG and map argument pointers. - auto actions_clone = std::move(*ActionsDAG::clone(&index_hint.getActions())); + auto actions_clone = index_hint.getActions().clone(); chassert(storage); result_dag_or_null->mergeNodes(std::move(actions_clone), storage); return *storage; diff --git a/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp b/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp index 22289187cfa..f1df9e231c4 100644 --- a/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp +++ b/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp @@ -80,7 +80,7 @@ PrewhereExprInfo MergeTreeSelectProcessor::getPrewhereActions(PrewhereInfoPtr pr PrewhereExprStep row_level_filter_step { .type = PrewhereExprStep::Filter, - .actions = std::make_shared(std::move(*ActionsDAG::clone(&*prewhere_info->row_level_filter)), actions_settings), + .actions = std::make_shared(prewhere_info->row_level_filter->clone(), actions_settings), .filter_column_name = prewhere_info->row_level_column_name, .remove_filter_column = true, .need_filter = true, @@ -96,7 +96,7 @@ PrewhereExprInfo MergeTreeSelectProcessor::getPrewhereActions(PrewhereInfoPtr pr PrewhereExprStep prewhere_step { .type = PrewhereExprStep::Filter, - .actions = std::make_shared(std::move(*ActionsDAG::clone(&*prewhere_info->prewhere_actions)), actions_settings), + .actions = std::make_shared(prewhere_info->prewhere_actions->clone(), actions_settings), .filter_column_name = prewhere_info->prewhere_column_name, .remove_filter_column = prewhere_info->remove_prewhere_column, .need_filter = prewhere_info->need_filter, diff --git a/src/Storages/SelectQueryInfo.h b/src/Storages/SelectQueryInfo.h index 589698fcc30..60f103fdb70 100644 --- a/src/Storages/SelectQueryInfo.h +++ b/src/Storages/SelectQueryInfo.h @@ -66,10 +66,10 @@ struct PrewhereInfo PrewhereInfoPtr prewhere_info = std::make_shared(); if (row_level_filter) - prewhere_info->row_level_filter = std::move(*ActionsDAG::clone(&*row_level_filter)); + prewhere_info->row_level_filter = row_level_filter->clone(); if (prewhere_actions) - prewhere_info->prewhere_actions = std::move(*ActionsDAG::clone(&*prewhere_actions)); + prewhere_info->prewhere_actions = prewhere_actions->clone(); prewhere_info->row_level_column_name = row_level_column_name; prewhere_info->prewhere_column_name = prewhere_column_name; diff --git a/src/Storages/StorageBuffer.cpp b/src/Storages/StorageBuffer.cpp index c096504170e..aee4e4683ad 100644 --- a/src/Storages/StorageBuffer.cpp +++ b/src/Storages/StorageBuffer.cpp @@ -313,7 +313,7 @@ void StorageBuffer::read( if (src_table_query_info.prewhere_info->row_level_filter) { src_table_query_info.prewhere_info->row_level_filter = ActionsDAG::merge( - std::move(*ActionsDAG::clone(&actions_dag)), + actions_dag.clone(), std::move(*src_table_query_info.prewhere_info->row_level_filter)); src_table_query_info.prewhere_info->row_level_filter->removeUnusedActions(); @@ -322,7 +322,7 @@ void StorageBuffer::read( if (src_table_query_info.prewhere_info->prewhere_actions) { src_table_query_info.prewhere_info->prewhere_actions = ActionsDAG::merge( - std::move(*ActionsDAG::clone(&actions_dag)), + actions_dag.clone(), std::move(*src_table_query_info.prewhere_info->prewhere_actions)); src_table_query_info.prewhere_info->prewhere_actions->removeUnusedActions(); @@ -429,21 +429,23 @@ void StorageBuffer::read( if (query_info.prewhere_info->row_level_filter) { + auto actions = std::make_shared(query_info.prewhere_info->row_level_filter->clone(), actions_settings); pipe_from_buffers.addSimpleTransform([&](const Block & header) { return std::make_shared( header, - std::make_shared(std::move(*ActionsDAG::clone(&*query_info.prewhere_info->row_level_filter)), actions_settings), + actions, query_info.prewhere_info->row_level_column_name, false); }); } + auto actions = std::make_shared(query_info.prewhere_info->prewhere_actions->clone(), actions_settings); pipe_from_buffers.addSimpleTransform([&](const Block & header) { return std::make_shared( header, - std::make_shared(std::move(*ActionsDAG::clone(&*query_info.prewhere_info->prewhere_actions)), actions_settings), + actions, query_info.prewhere_info->prewhere_column_name, query_info.prewhere_info->remove_prewhere_column); }); diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp index e16e2a07685..0e1568c8e79 100644 --- a/src/Storages/StorageMerge.cpp +++ b/src/Storages/StorageMerge.cpp @@ -663,7 +663,7 @@ std::vector ReadFromMerge::createChildrenPlans(SelectQ { auto filter_step = std::make_unique( child.plan.getCurrentDataStream(), - std::move(*ActionsDAG::clone(&filter_info.actions)), + filter_info.actions.clone(), filter_info.column_name, filter_info.do_remove_column); @@ -1241,7 +1241,7 @@ ReadFromMerge::RowPolicyData::RowPolicyData(RowPolicyFilterPtr row_policy_filter auto expression_analyzer = ExpressionAnalyzer{expr, syntax_result, local_context}; actions_dag = expression_analyzer.getActionsDAG(false /* add_aliases */, false /* project_result */); - filter_actions = std::make_shared(std::move(*ActionsDAG::clone(&actions_dag)), + filter_actions = std::make_shared(actions_dag.clone(), ExpressionActionsSettings::fromContext(local_context, CompileExpressions::yes)); const auto & required_columns = filter_actions->getRequiredColumnsWithTypes(); const auto & sample_block_columns = filter_actions->getSampleBlock().getNamesAndTypesList(); @@ -1279,12 +1279,12 @@ void ReadFromMerge::RowPolicyData::extendNames(Names & names) const void ReadFromMerge::RowPolicyData::addStorageFilter(SourceStepWithFilter * step) const { - step->addFilter(ActionsDAG::clone(&actions_dag), filter_column_name); + step->addFilter(std::make_unique(actions_dag.clone()), filter_column_name); } void ReadFromMerge::RowPolicyData::addFilterTransform(QueryPlan & plan) const { - auto filter_step = std::make_unique(plan.getCurrentDataStream(), std::move(*ActionsDAG::clone(&actions_dag)), filter_column_name, true /* remove filter column */); + auto filter_step = std::make_unique(plan.getCurrentDataStream(), actions_dag.clone(), filter_column_name, true /* remove filter column */); plan.addStep(std::move(filter_step)); } diff --git a/src/Storages/VirtualColumnUtils.cpp b/src/Storages/VirtualColumnUtils.cpp index 32c6a558340..146a54eda78 100644 --- a/src/Storages/VirtualColumnUtils.cpp +++ b/src/Storages/VirtualColumnUtils.cpp @@ -323,7 +323,7 @@ static const ActionsDAG::Node * splitFilterNodeForAllowedInputs( { if (const auto * index_hint = typeid_cast(adaptor->getFunction().get())) { - auto index_hint_dag = std::move(*ActionsDAG::clone(&index_hint->getActions())); + auto index_hint_dag = index_hint->getActions().clone(); ActionsDAG::NodeRawConstPtrs atoms; for (const auto & output : index_hint_dag.getOutputs()) if (const auto * child_copy = splitFilterNodeForAllowedInputs(output, allowed_inputs, additional_nodes)) From 7fc8ee726e3ef2dfb7d778fbb1a70fb147a33067 Mon Sep 17 00:00:00 2001 From: Michael Stetsyuk Date: Thu, 18 Jul 2024 12:32:16 +0200 Subject: [PATCH 071/661] add replication lag and recovery time metrics --- src/Databases/DatabaseReplicated.cpp | 44 +++++++++---------- src/Databases/DatabaseReplicated.h | 10 ++++- src/Databases/DatabaseReplicatedWorker.cpp | 4 ++ src/Databases/DatabaseReplicatedWorker.h | 4 ++ src/Storages/System/StorageSystemClusters.cpp | 33 +++++++++----- src/Storages/System/StorageSystemClusters.h | 2 +- .../test_recovery_time_metric/__init__.py | 0 .../configs/config.xml | 41 +++++++++++++++++ .../test_recovery_time_metric/test.py | 26 +++++++++++ 9 files changed, 129 insertions(+), 35 deletions(-) create mode 100644 tests/integration/test_recovery_time_metric/__init__.py create mode 100644 tests/integration/test_recovery_time_metric/configs/config.xml create mode 100644 tests/integration/test_recovery_time_metric/test.py diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp index 7ce2859e962..b11b9382732 100644 --- a/src/Databases/DatabaseReplicated.cpp +++ b/src/Databases/DatabaseReplicated.cpp @@ -338,42 +338,40 @@ ClusterPtr DatabaseReplicated::getClusterImpl(bool all_groups) const return std::make_shared(getContext()->getSettingsRef(), shards, params); } -std::vector DatabaseReplicated::tryGetAreReplicasActive(const ClusterPtr & cluster_) const +ReplicasInfo DatabaseReplicated::tryGetReplicasInfo(const ClusterPtr & cluster_) const { - Strings paths; + ReplicasInfo res; + + auto zookeeper = getZooKeeper(); const auto & addresses_with_failover = cluster_->getShardsAddresses(); const auto & shards_info = cluster_->getShardsInfo(); - for (size_t shard_index = 0; shard_index < shards_info.size(); ++shard_index) - { - for (const auto & replica : addresses_with_failover[shard_index]) - { - String full_name = getFullReplicaName(replica.database_shard_name, replica.database_replica_name); - paths.emplace_back(fs::path(zookeeper_path) / "replicas" / full_name / "active"); - } - } try { - auto current_zookeeper = getZooKeeper(); - auto res = current_zookeeper->exists(paths); + UInt32 max_log_ptr = parse(zookeeper->get(zookeeper_path + "/max_log_ptr")); - std::vector statuses; - statuses.resize(paths.size()); - - for (size_t i = 0; i < res.size(); ++i) - if (res[i].error == Coordination::Error::ZOK) - statuses[i] = 1; - - return statuses; - } - catch (...) + for (size_t shard_index = 0; shard_index < shards_info.size(); ++shard_index) + { + for (const auto & replica : addresses_with_failover[shard_index]) + { + String full_name = getFullReplicaName(replica.database_shard_name, replica.database_replica_name); + UInt32 log_ptr = parse(zookeeper->get(fs::path(zookeeper_path) / "replicas" / full_name / "log_ptr")); + bool is_active = zookeeper->exists(fs::path(zookeeper_path) / "replicas" / full_name / "active"); + res.push_back(ReplicaInfo{ + .is_active = is_active, + .replication_lag = max_log_ptr - log_ptr, + .recovery_time = replica.is_local ? ddl_worker->getCurrentInitializationDurationMs() : 0, + }); + } + } + return res; + } catch (...) { tryLogCurrentException(log); return {}; } } - void DatabaseReplicated::fillClusterAuthInfo(String collection_name, const Poco::Util::AbstractConfiguration & config_ref) { const auto & config_prefix = fmt::format("named_collections.{}", collection_name); diff --git a/src/Databases/DatabaseReplicated.h b/src/Databases/DatabaseReplicated.h index eab5b2ff931..db02b5ef30f 100644 --- a/src/Databases/DatabaseReplicated.h +++ b/src/Databases/DatabaseReplicated.h @@ -17,6 +17,14 @@ using ZooKeeperPtr = std::shared_ptr; class Cluster; using ClusterPtr = std::shared_ptr; +struct ReplicaInfo +{ + bool is_active; + UInt32 replication_lag; + UInt64 recovery_time; +}; +using ReplicasInfo = std::vector; + class DatabaseReplicated : public DatabaseAtomic { public: @@ -84,7 +92,7 @@ public: static void dropReplica(DatabaseReplicated * database, const String & database_zookeeper_path, const String & shard, const String & replica, bool throw_if_noop); - std::vector tryGetAreReplicasActive(const ClusterPtr & cluster_) const; + ReplicasInfo tryGetReplicasInfo(const ClusterPtr & cluster_) const; void renameDatabase(ContextPtr query_context, const String & new_name) override; diff --git a/src/Databases/DatabaseReplicatedWorker.cpp b/src/Databases/DatabaseReplicatedWorker.cpp index 1ef88dc03bc..cea2d123f87 100644 --- a/src/Databases/DatabaseReplicatedWorker.cpp +++ b/src/Databases/DatabaseReplicatedWorker.cpp @@ -32,6 +32,8 @@ DatabaseReplicatedDDLWorker::DatabaseReplicatedDDLWorker(DatabaseReplicated * db bool DatabaseReplicatedDDLWorker::initializeMainThread() { + initialization_duration_timer.emplace(); + while (!stop_flag) { try @@ -69,6 +71,7 @@ bool DatabaseReplicatedDDLWorker::initializeMainThread() initializeReplication(); initialized = true; + initialization_duration_timer.reset(); return true; } catch (...) @@ -78,6 +81,7 @@ bool DatabaseReplicatedDDLWorker::initializeMainThread() } } + initialization_duration_timer.reset(); return false; } diff --git a/src/Databases/DatabaseReplicatedWorker.h b/src/Databases/DatabaseReplicatedWorker.h index 41edf2221b8..aea3b71173d 100644 --- a/src/Databases/DatabaseReplicatedWorker.h +++ b/src/Databases/DatabaseReplicatedWorker.h @@ -36,6 +36,8 @@ public: DatabaseReplicated * const database, bool committed = false); /// NOLINT UInt32 getLogPointer() const; + + UInt64 getCurrentInitializationDurationMs() const { return initialization_duration_timer ? initialization_duration_timer->elapsedMilliseconds() : 0; } private: bool initializeMainThread() override; void initializeReplication(); @@ -56,6 +58,8 @@ private: ZooKeeperPtr active_node_holder_zookeeper; /// It will remove "active" node when database is detached zkutil::EphemeralNodeHolderPtr active_node_holder; + + std::optional initialization_duration_timer; }; } diff --git a/src/Storages/System/StorageSystemClusters.cpp b/src/Storages/System/StorageSystemClusters.cpp index 160c8d6270e..0da4bd70cbd 100644 --- a/src/Storages/System/StorageSystemClusters.cpp +++ b/src/Storages/System/StorageSystemClusters.cpp @@ -31,6 +31,8 @@ ColumnsDescription StorageSystemClusters::getColumnsDescription() {"database_shard_name", std::make_shared(), "The name of the `Replicated` database shard (for clusters that belong to a `Replicated` database)."}, {"database_replica_name", std::make_shared(), "The name of the `Replicated` database replica (for clusters that belong to a `Replicated` database)."}, {"is_active", std::make_shared(std::make_shared()), "The status of the Replicated database replica (for clusters that belong to a Replicated database): 1 means 'replica is online', 0 means 'replica is offline', NULL means 'unknown'."}, + {"replication_lag", std::make_shared(std::make_shared()), "The replication lag of the `Replicated` database replica (for clusters that belong to a Replicated database)."}, + {"recovery_time", std::make_shared(std::make_shared()), "The recovery time of the `Replicated` database replica (for clusters that belong to a Replicated database), in milliseconds."}, }; description.setAliases({ @@ -46,24 +48,23 @@ void StorageSystemClusters::fillData(MutableColumns & res_columns, ContextPtr co writeCluster(res_columns, name_and_cluster, {}); const auto databases = DatabaseCatalog::instance().getDatabases(); - for (const auto & name_and_database : databases) + for (const auto & [database_name, database] : databases) { - if (const auto * replicated = typeid_cast(name_and_database.second.get())) + if (const auto * replicated = typeid_cast(database.get())) { - if (auto database_cluster = replicated->tryGetCluster()) - writeCluster(res_columns, {name_and_database.first, database_cluster}, - replicated->tryGetAreReplicasActive(database_cluster)); + writeCluster(res_columns, {database_name, database_cluster}, + replicated->tryGetReplicasInfo(database_cluster)); if (auto database_cluster = replicated->tryGetAllGroupsCluster()) - writeCluster(res_columns, {DatabaseReplicated::ALL_GROUPS_CLUSTER_PREFIX + name_and_database.first, database_cluster}, - replicated->tryGetAreReplicasActive(database_cluster)); + writeCluster(res_columns, {DatabaseReplicated::ALL_GROUPS_CLUSTER_PREFIX + database_name, database_cluster}, + replicated->tryGetReplicasInfo(database_cluster)); } } } void StorageSystemClusters::writeCluster(MutableColumns & res_columns, const NameAndCluster & name_and_cluster, - const std::vector & is_active) + const ReplicasInfo & replicas_info) { const String & cluster_name = name_and_cluster.first; const ClusterPtr & cluster = name_and_cluster.second; @@ -99,10 +100,22 @@ void StorageSystemClusters::writeCluster(MutableColumns & res_columns, const Nam res_columns[i++]->insert(pool_status[replica_index].estimated_recovery_time.count()); res_columns[i++]->insert(address.database_shard_name); res_columns[i++]->insert(address.database_replica_name); - if (is_active.empty()) + if (replicas_info.empty()) + { res_columns[i++]->insertDefault(); + res_columns[i++]->insertDefault(); + res_columns[i++]->insertDefault(); + } else - res_columns[i++]->insert(is_active[replica_idx++]); + { + const auto & replica_info = replicas_info[replica_idx++]; + res_columns[i++]->insert(replica_info.is_active); + res_columns[i++]->insert(replica_info.replication_lag); + if (replica_info.recovery_time != 0) + res_columns[i++]->insert(replica_info.recovery_time); + else + res_columns[i++]->insertDefault(); + } } } } diff --git a/src/Storages/System/StorageSystemClusters.h b/src/Storages/System/StorageSystemClusters.h index 0f7c792261d..ead123aa79e 100644 --- a/src/Storages/System/StorageSystemClusters.h +++ b/src/Storages/System/StorageSystemClusters.h @@ -27,7 +27,7 @@ protected: using NameAndCluster = std::pair>; void fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const override; - static void writeCluster(MutableColumns & res_columns, const NameAndCluster & name_and_cluster, const std::vector & is_active); + static void writeCluster(MutableColumns & res_columns, const NameAndCluster & name_and_cluster, const ReplicasInfo & replicas_info); }; } diff --git a/tests/integration/test_recovery_time_metric/__init__.py b/tests/integration/test_recovery_time_metric/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_recovery_time_metric/configs/config.xml b/tests/integration/test_recovery_time_metric/configs/config.xml new file mode 100644 index 00000000000..bad9b1fa9ea --- /dev/null +++ b/tests/integration/test_recovery_time_metric/configs/config.xml @@ -0,0 +1,41 @@ + + 9000 + + + + + + + + + default + + + + + + 2181 + 1 + /var/lib/clickhouse/coordination/log + /var/lib/clickhouse/coordination/snapshots + + 20000 + + + + 1 + localhost + 9444 + + + + + + + localhost + 2181 + + 20000 + + + diff --git a/tests/integration/test_recovery_time_metric/test.py b/tests/integration/test_recovery_time_metric/test.py new file mode 100644 index 00000000000..9ceb0cce288 --- /dev/null +++ b/tests/integration/test_recovery_time_metric/test.py @@ -0,0 +1,26 @@ +import pytest +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__) +node = cluster.add_instance("node", main_configs=["configs/config.xml"], with_zookeeper=True) + + +@pytest.fixture(scope="module") +def start_cluster(): + try: + cluster.start() + yield cluster + finally: + cluster.shutdown() + + +def test_recovery_time_metric(start_cluster): + node.query("CREATE DATABASE rdb ENGINE = Replicated('/test/test_recovery_time_metric', 'shard1', 'replica1');") + node.query("CREATE TABLE rdb.t (x UInt32) ENGINE = MergeTree ORDER BY x;") + node.exec_in_container(["bash", "-c", "rm /var/lib/clickhouse/metadata/rdb/t.sql"]) + node.restart_clickhouse() + assert ( + node.query("SELECT any(recovery_time) FROM system.clusters;") + != "0\n" + ) + From 392183832834bf7041a0cae4fd27fc1112f51bc2 Mon Sep 17 00:00:00 2001 From: avogar Date: Thu, 18 Jul 2024 16:45:26 +0000 Subject: [PATCH 072/661] Fix runtime error while converting [Null] field to Array(Variant) --- src/Interpreters/convertFieldToType.cpp | 10 +++++----- .../03203_variant_convert_field_to_type_bug.reference | 0 .../03203_variant_convert_field_to_type_bug.sql | 5 +++++ 3 files changed, 10 insertions(+), 5 deletions(-) create mode 100644 tests/queries/0_stateless/03203_variant_convert_field_to_type_bug.reference create mode 100644 tests/queries/0_stateless/03203_variant_convert_field_to_type_bug.sql diff --git a/src/Interpreters/convertFieldToType.cpp b/src/Interpreters/convertFieldToType.cpp index 184c263dbdb..9ee214f4415 100644 --- a/src/Interpreters/convertFieldToType.cpp +++ b/src/Interpreters/convertFieldToType.cpp @@ -356,7 +356,7 @@ Field convertFieldToTypeImpl(const Field & src, const IDataType & type, const ID for (size_t i = 0; i < src_arr_size; ++i) { res[i] = convertFieldToType(src_arr[i], element_type); - if (res[i].isNull() && !element_type.isNullable()) + if (res[i].isNull() && !canContainNull(element_type)) { // See the comment for Tuples below. have_unconvertible_element = true; @@ -384,7 +384,7 @@ Field convertFieldToTypeImpl(const Field & src, const IDataType & type, const ID { const auto & element_type = *(type_tuple->getElements()[i]); res[i] = convertFieldToType(src_tuple[i], element_type); - if (!res[i].isNull() || element_type.isNullable()) + if (!res[i].isNull() || canContainNull(element_type)) continue; /* @@ -433,11 +433,11 @@ Field convertFieldToTypeImpl(const Field & src, const IDataType & type, const ID updated_entry[0] = convertFieldToType(key, key_type); - if (updated_entry[0].isNull() && !key_type.isNullable()) + if (updated_entry[0].isNull() && !canContainNull(key_type)) have_unconvertible_element = true; updated_entry[1] = convertFieldToType(value, value_type); - if (updated_entry[1].isNull() && !value_type.isNullable()) + if (updated_entry[1].isNull() && !canContainNull(value_type)) have_unconvertible_element = true; res[i] = updated_entry; @@ -592,7 +592,7 @@ Field convertFieldToType(const Field & from_value, const IDataType & to_type, co Field convertFieldToTypeOrThrow(const Field & from_value, const IDataType & to_type, const IDataType * from_type_hint) { bool is_null = from_value.isNull(); - if (is_null && !to_type.isNullable() && !to_type.isLowCardinalityNullable()) + if (is_null && !canContainNull(to_type)) throw Exception(ErrorCodes::TYPE_MISMATCH, "Cannot convert NULL to {}", to_type.getName()); Field converted = convertFieldToType(from_value, to_type, from_type_hint); diff --git a/tests/queries/0_stateless/03203_variant_convert_field_to_type_bug.reference b/tests/queries/0_stateless/03203_variant_convert_field_to_type_bug.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03203_variant_convert_field_to_type_bug.sql b/tests/queries/0_stateless/03203_variant_convert_field_to_type_bug.sql new file mode 100644 index 00000000000..b73bb8ffa6d --- /dev/null +++ b/tests/queries/0_stateless/03203_variant_convert_field_to_type_bug.sql @@ -0,0 +1,5 @@ +set allow_experimental_variant_type=1; +set use_variant_as_common_type=1; + +SELECT * FROM numbers([tuple(1, 2), NULL], 2); -- {serverError ILLEGAL_TYPE_OF_ARGUMENT} + From a6d4db342b2fc83e385d549ba5ce9ebf9e63064e Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Thu, 18 Jul 2024 16:45:40 +0000 Subject: [PATCH 073/661] Automatic style fix --- .../integration/test_recovery_time_metric/test.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/integration/test_recovery_time_metric/test.py b/tests/integration/test_recovery_time_metric/test.py index 9ceb0cce288..90155f81ba2 100644 --- a/tests/integration/test_recovery_time_metric/test.py +++ b/tests/integration/test_recovery_time_metric/test.py @@ -2,7 +2,9 @@ import pytest from helpers.cluster import ClickHouseCluster cluster = ClickHouseCluster(__file__) -node = cluster.add_instance("node", main_configs=["configs/config.xml"], with_zookeeper=True) +node = cluster.add_instance( + "node", main_configs=["configs/config.xml"], with_zookeeper=True +) @pytest.fixture(scope="module") @@ -15,12 +17,10 @@ def start_cluster(): def test_recovery_time_metric(start_cluster): - node.query("CREATE DATABASE rdb ENGINE = Replicated('/test/test_recovery_time_metric', 'shard1', 'replica1');") + node.query( + "CREATE DATABASE rdb ENGINE = Replicated('/test/test_recovery_time_metric', 'shard1', 'replica1');" + ) node.query("CREATE TABLE rdb.t (x UInt32) ENGINE = MergeTree ORDER BY x;") node.exec_in_container(["bash", "-c", "rm /var/lib/clickhouse/metadata/rdb/t.sql"]) node.restart_clickhouse() - assert ( - node.query("SELECT any(recovery_time) FROM system.clusters;") - != "0\n" - ) - + assert node.query("SELECT any(recovery_time) FROM system.clusters;") != "0\n" From 225af356c7ca3fd3c401f0fa8273d3dd751297fa Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 18 Jul 2024 17:06:03 +0000 Subject: [PATCH 074/661] Better. --- src/Interpreters/ActionsDAG.cpp | 1 - src/Interpreters/ActionsDAG.h | 1 - src/Interpreters/ActionsVisitor.h | 8 +------- src/Interpreters/ExpressionAnalyzer.h | 3 --- src/Interpreters/WindowDescription.h | 1 - src/Interpreters/addMissingDefaults.h | 6 ------ 6 files changed, 1 insertion(+), 19 deletions(-) diff --git a/src/Interpreters/ActionsDAG.cpp b/src/Interpreters/ActionsDAG.cpp index 53e04f24829..85b2b38da17 100644 --- a/src/Interpreters/ActionsDAG.cpp +++ b/src/Interpreters/ActionsDAG.cpp @@ -3150,7 +3150,6 @@ const ActionsDAG::Node * FindOriginalNodeForOutputName::find(const String & outp } FindAliasForInputName::FindAliasForInputName(const ActionsDAG & actions_) - //: actions(actions_) { const auto & actions_outputs = actions_.getOutputs(); for (const auto * output_node : actions_outputs) diff --git a/src/Interpreters/ActionsDAG.h b/src/Interpreters/ActionsDAG.h index 6f6c3f9bccb..76cc9327530 100644 --- a/src/Interpreters/ActionsDAG.h +++ b/src/Interpreters/ActionsDAG.h @@ -501,7 +501,6 @@ public: const ActionsDAG::Node * find(const String & name); private: - //const ActionsDAG & actions; NameToNodeIndex index; }; diff --git a/src/Interpreters/ActionsVisitor.h b/src/Interpreters/ActionsVisitor.h index 496d9b9b587..5b638fc14c8 100644 --- a/src/Interpreters/ActionsVisitor.h +++ b/src/Interpreters/ActionsVisitor.h @@ -18,12 +18,6 @@ namespace DB class ASTExpressionList; class ASTFunction; -class ExpressionActions; -using ExpressionActionsPtr = std::shared_ptr; - -class ActionsDAG; -using ActionsDAGPtr = std::unique_ptr; - class IFunctionOverloadResolver; using FunctionOverloadResolverPtr = std::shared_ptr; @@ -32,7 +26,7 @@ FutureSetPtr makeExplicitSet( const ASTFunction * node, const ActionsDAG & actions, ContextPtr context, PreparedSets & prepared_sets); /** For ActionsVisitor - * A stack of ExpressionActions corresponding to nested lambda expressions. + * A stack of ActionsDAG corresponding to nested lambda expressions. * The new action should be added to the highest possible level. * For example, in the expression "select arrayMap(x -> x + column1 * column2, array1)" * calculation of the product must be done outside the lambda expression (it does not depend on x), diff --git a/src/Interpreters/ExpressionAnalyzer.h b/src/Interpreters/ExpressionAnalyzer.h index 737d36eb504..0c00247df85 100644 --- a/src/Interpreters/ExpressionAnalyzer.h +++ b/src/Interpreters/ExpressionAnalyzer.h @@ -38,9 +38,6 @@ using StorageMetadataPtr = std::shared_ptr; class ArrayJoinAction; using ArrayJoinActionPtr = std::shared_ptr; -class ActionsDAG; -using ActionsDAGPtr = std::unique_ptr; - /// Create columns in block or return false if not possible bool sanitizeBlock(Block & block, bool throw_if_cannot_create_column = false); diff --git a/src/Interpreters/WindowDescription.h b/src/Interpreters/WindowDescription.h index 17bfe619c30..d51d9ca94d8 100644 --- a/src/Interpreters/WindowDescription.h +++ b/src/Interpreters/WindowDescription.h @@ -14,7 +14,6 @@ namespace DB class ASTFunction; class ActionsDAG; -using ActionsDAGPtr = std::unique_ptr; struct WindowFunctionDescription { diff --git a/src/Interpreters/addMissingDefaults.h b/src/Interpreters/addMissingDefaults.h index 5299bae9745..551583a0006 100644 --- a/src/Interpreters/addMissingDefaults.h +++ b/src/Interpreters/addMissingDefaults.h @@ -2,11 +2,6 @@ #include -#include -#include -#include - - namespace DB { @@ -15,7 +10,6 @@ class NamesAndTypesList; class ColumnsDescription; class ActionsDAG; -using ActionsDAGPtr = std::unique_ptr; /** Adds three types of columns into block * 1. Columns, that are missed inside request, but present in table without defaults (missed columns) From 4ebb189691c2d553887e9d49b52f9e0a45eaf004 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 18 Jul 2024 17:59:59 +0000 Subject: [PATCH 075/661] Better. --- src/Planner/CollectTableExpressionData.cpp | 8 ++++---- src/Planner/PlannerJoinTree.cpp | 12 ++++++------ src/Planner/TableExpressionData.h | 6 +++--- src/Storages/SelectQueryInfo.cpp | 2 +- 4 files changed, 14 insertions(+), 14 deletions(-) diff --git a/src/Planner/CollectTableExpressionData.cpp b/src/Planner/CollectTableExpressionData.cpp index 1d85476636c..2fe62aa9be0 100644 --- a/src/Planner/CollectTableExpressionData.cpp +++ b/src/Planner/CollectTableExpressionData.cpp @@ -88,15 +88,15 @@ public: auto column_identifier = planner_context->getGlobalPlannerContext()->createColumnIdentifier(node); - ActionsDAGPtr alias_column_actions_dag = std::make_unique(); + ActionsDAG alias_column_actions_dag; PlannerActionsVisitor actions_visitor(planner_context, false); - auto outputs = actions_visitor.visit(*alias_column_actions_dag, column_node->getExpression()); + auto outputs = actions_visitor.visit(alias_column_actions_dag, column_node->getExpression()); if (outputs.size() != 1) throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected single output in actions dag for alias column {}. Actual {}", column_node->dumpTree(), outputs.size()); const auto & column_name = column_node->getColumnName(); - const auto & alias_node = alias_column_actions_dag->addAlias(*outputs[0], column_name); - alias_column_actions_dag->addOrReplaceInOutputs(alias_node); + const auto & alias_node = alias_column_actions_dag.addAlias(*outputs[0], column_name); + alias_column_actions_dag.addOrReplaceInOutputs(alias_node); table_expression_data.addAliasColumn(column_node->getColumn(), column_identifier, std::move(alias_column_actions_dag), select_added_columns); } diff --git a/src/Planner/PlannerJoinTree.cpp b/src/Planner/PlannerJoinTree.cpp index 10b5a761d58..048bfa4b577 100644 --- a/src/Planner/PlannerJoinTree.cpp +++ b/src/Planner/PlannerJoinTree.cpp @@ -591,16 +591,16 @@ UInt64 mainQueryNodeBlockSizeByLimit(const SelectQueryInfo & select_query_info) } std::unique_ptr createComputeAliasColumnsStep( - const std::unordered_map & alias_column_expressions, const DataStream & current_data_stream) + std::unordered_map & alias_column_expressions, const DataStream & current_data_stream) { ActionsDAG merged_alias_columns_actions_dag(current_data_stream.header.getColumnsWithTypeAndName()); ActionsDAG::NodeRawConstPtrs action_dag_outputs = merged_alias_columns_actions_dag.getInputs(); - for (const auto & [column_name, alias_column_actions_dag] : alias_column_expressions) + for (auto & [column_name, alias_column_actions_dag] : alias_column_expressions) { - const auto & current_outputs = alias_column_actions_dag->getOutputs(); + const auto & current_outputs = alias_column_actions_dag.getOutputs(); action_dag_outputs.insert(action_dag_outputs.end(), current_outputs.begin(), current_outputs.end()); - merged_alias_columns_actions_dag.mergeNodes(std::move(*alias_column_actions_dag)); + merged_alias_columns_actions_dag.mergeNodes(std::move(alias_column_actions_dag)); } for (const auto * output_node : action_dag_outputs) @@ -996,7 +996,7 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres } } - const auto & alias_column_expressions = table_expression_data.getAliasColumnExpressions(); + auto & alias_column_expressions = table_expression_data.getAliasColumnExpressions(); if (!alias_column_expressions.empty() && query_plan.isInitialized() && from_stage == QueryProcessingStage::FetchColumns) { auto alias_column_step = createComputeAliasColumnsStep(alias_column_expressions, query_plan.getCurrentDataStream()); @@ -1085,7 +1085,7 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres query_plan = std::move(subquery_planner).extractQueryPlan(); } - const auto & alias_column_expressions = table_expression_data.getAliasColumnExpressions(); + auto & alias_column_expressions = table_expression_data.getAliasColumnExpressions(); if (!alias_column_expressions.empty() && query_plan.isInitialized() && from_stage == QueryProcessingStage::FetchColumns) { auto alias_column_step = createComputeAliasColumnsStep(alias_column_expressions, query_plan.getCurrentDataStream()); diff --git a/src/Planner/TableExpressionData.h b/src/Planner/TableExpressionData.h index 1d04fac3dc3..72412a869e4 100644 --- a/src/Planner/TableExpressionData.h +++ b/src/Planner/TableExpressionData.h @@ -73,7 +73,7 @@ public: } /// Add alias column - void addAliasColumn(const NameAndTypePair & column, const ColumnIdentifier & column_identifier, ActionsDAGPtr actions_dag, bool is_selected_column = true) + void addAliasColumn(const NameAndTypePair & column, const ColumnIdentifier & column_identifier, ActionsDAG actions_dag, bool is_selected_column = true) { alias_column_expressions.emplace(column.name, std::move(actions_dag)); addColumnImpl(column, column_identifier, is_selected_column); @@ -94,7 +94,7 @@ public: } /// Get ALIAS columns names mapped to expressions - const std::unordered_map & getAliasColumnExpressions() const + std::unordered_map & getAliasColumnExpressions() { return alias_column_expressions; } @@ -277,7 +277,7 @@ private: NameSet selected_column_names_set; /// Expression to calculate ALIAS columns - std::unordered_map alias_column_expressions; + std::unordered_map alias_column_expressions; /// Valid for table, table function, array join, query, union nodes ColumnNameToColumn column_name_to_column; diff --git a/src/Storages/SelectQueryInfo.cpp b/src/Storages/SelectQueryInfo.cpp index d59ccf0dfaf..c9c96ed5837 100644 --- a/src/Storages/SelectQueryInfo.cpp +++ b/src/Storages/SelectQueryInfo.cpp @@ -18,7 +18,7 @@ std::unordered_map SelectQueryInfo::buildNod std::unordered_map node_name_to_input_node_column; if (planner_context) { - const auto & table_expression_data = planner_context->getTableExpressionDataOrThrow(table_expression); + auto & table_expression_data = planner_context->getTableExpressionDataOrThrow(table_expression); const auto & alias_column_expressions = table_expression_data.getAliasColumnExpressions(); for (const auto & [column_identifier, column_name] : table_expression_data.getColumnIdentifierToColumnName()) { From e806123856f5ded0f2e92f4f4b42c38132276c15 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Thu, 18 Jul 2024 20:30:56 +0000 Subject: [PATCH 076/661] Fix non x86 build --- src/QueryPipeline/RemoteQueryExecutor.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/QueryPipeline/RemoteQueryExecutor.cpp b/src/QueryPipeline/RemoteQueryExecutor.cpp index 87f634b8334..d7edbc9ed35 100644 --- a/src/QueryPipeline/RemoteQueryExecutor.cpp +++ b/src/QueryPipeline/RemoteQueryExecutor.cpp @@ -469,7 +469,7 @@ RemoteQueryExecutor::ReadResult RemoteQueryExecutor::read() return restartQueryWithoutDuplicatedUUIDs(); } -RemoteQueryExecutor::ReadResult RemoteQueryExecutor::readAsync(bool check_packet_type_only) +RemoteQueryExecutor::ReadResult RemoteQueryExecutor::readAsync([[maybe_unused]] bool check_packet_type_only) { #if defined(OS_LINUX) if (!read_context || (resent_query && recreate_read_context)) From 4ef9cb6d7aa32aeb56c26bfa6ecad94beacba540 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 18 Jul 2024 23:13:32 +0200 Subject: [PATCH 077/661] Fix style --- src/IO/ReadBufferFromFileBase.cpp | 5 ----- src/IO/ReadWriteBufferFromHTTP.cpp | 1 - 2 files changed, 6 deletions(-) diff --git a/src/IO/ReadBufferFromFileBase.cpp b/src/IO/ReadBufferFromFileBase.cpp index d42b12ba49b..b7a1438cff8 100644 --- a/src/IO/ReadBufferFromFileBase.cpp +++ b/src/IO/ReadBufferFromFileBase.cpp @@ -5,11 +5,6 @@ namespace DB { -namespace ErrorCodes -{ - extern const int UNKNOWN_FILE_SIZE; -} - ReadBufferFromFileBase::ReadBufferFromFileBase() : BufferWithOwnMemory(0) { } diff --git a/src/IO/ReadWriteBufferFromHTTP.cpp b/src/IO/ReadWriteBufferFromHTTP.cpp index 2a62b11aa44..4d27a78c8dc 100644 --- a/src/IO/ReadWriteBufferFromHTTP.cpp +++ b/src/IO/ReadWriteBufferFromHTTP.cpp @@ -72,7 +72,6 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; extern const int CANNOT_SEEK_THROUGH_FILE; extern const int SEEK_POSITION_OUT_OF_BOUND; - extern const int UNKNOWN_FILE_SIZE; } std::unique_ptr ReadWriteBufferFromHTTP::CallResult::transformToReadBuffer(size_t buf_size) && From 0bf9346b07dc6fb07180a4221477512ba4eae024 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 19 Jul 2024 00:08:36 +0200 Subject: [PATCH 078/661] Update 03206_no_exceptions_clickhouse_local.sh --- .../queries/0_stateless/03206_no_exceptions_clickhouse_local.sh | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/queries/0_stateless/03206_no_exceptions_clickhouse_local.sh b/tests/queries/0_stateless/03206_no_exceptions_clickhouse_local.sh index 86839a228dc..00efd1f4591 100755 --- a/tests/queries/0_stateless/03206_no_exceptions_clickhouse_local.sh +++ b/tests/queries/0_stateless/03206_no_exceptions_clickhouse_local.sh @@ -1,6 +1,4 @@ #!/usr/bin/env bash -# Tags: no-fasttest -# Tag no-fasttest: In fasttest, ENABLE_LIBRARIES=0, so the grpc library is not built CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh From 7d9b7cc79611751adc6d22aa47c5e179228a2840 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 15 Jul 2024 02:54:29 +0200 Subject: [PATCH 079/661] Introduce ASTDataType --- src/DataTypes/DataTypeFactory.cpp | 14 ++--- src/Databases/DatabasesCommon.cpp | 2 +- .../PostgreSQL/DatabasePostgreSQL.cpp | 15 +++-- .../InterpreterShowCreateQuery.cpp | 2 - .../MySQL/InterpretersMySQLDDLQuery.cpp | 28 ++++----- .../formatWithPossiblyHidingSecrets.h | 8 ++- src/Parsers/ASTColumnDeclaration.cpp | 4 -- src/Parsers/ASTDataType.cpp | 57 +++++++++++++++++++ src/Parsers/ASTDataType.h | 36 ++++++++++++ src/Parsers/ASTFunction.cpp | 4 -- src/Parsers/ASTFunction.h | 2 +- src/Parsers/IAST.h | 15 +++-- src/Parsers/ParserCreateQuery.cpp | 7 ++- src/Parsers/ParserCreateQuery.h | 8 +-- src/Parsers/ParserDataType.cpp | 14 ++--- .../StorageMaterializedPostgreSQL.cpp | 13 ++--- src/Storages/WindowView/StorageWindowView.cpp | 3 +- 17 files changed, 156 insertions(+), 76 deletions(-) create mode 100644 src/Parsers/ASTDataType.cpp create mode 100644 src/Parsers/ASTDataType.h diff --git a/src/DataTypes/DataTypeFactory.cpp b/src/DataTypes/DataTypeFactory.cpp index af37cde2846..45552e506cd 100644 --- a/src/DataTypes/DataTypeFactory.cpp +++ b/src/DataTypes/DataTypeFactory.cpp @@ -2,7 +2,7 @@ #include #include #include -#include +#include #include #include #include @@ -83,15 +83,9 @@ DataTypePtr DataTypeFactory::tryGet(const ASTPtr & ast) const template DataTypePtr DataTypeFactory::getImpl(const ASTPtr & ast) const { - if (const auto * func = ast->as()) + if (const auto * type = ast->as()) { - if (func->parameters) - { - if constexpr (nullptr_on_error) - return nullptr; - throw Exception(ErrorCodes::ILLEGAL_SYNTAX_FOR_DATA_TYPE, "Data type cannot have multiple parenthesized parameters."); - } - return getImpl(func->name, func->arguments); + return getImpl(type->name, type->arguments); } if (const auto * ident = ast->as()) @@ -107,7 +101,7 @@ DataTypePtr DataTypeFactory::getImpl(const ASTPtr & ast) const if constexpr (nullptr_on_error) return nullptr; - throw Exception(ErrorCodes::UNEXPECTED_AST_STRUCTURE, "Unexpected AST element for data type."); + throw Exception(ErrorCodes::UNEXPECTED_AST_STRUCTURE, "Unexpected AST element for data type: {}.", ast->getID()); } DataTypePtr DataTypeFactory::get(const String & family_name_param, const ASTPtr & parameters) const diff --git a/src/Databases/DatabasesCommon.cpp b/src/Databases/DatabasesCommon.cpp index fe0baf30e57..cacba581745 100644 --- a/src/Databases/DatabasesCommon.cpp +++ b/src/Databases/DatabasesCommon.cpp @@ -149,7 +149,7 @@ ASTPtr getCreateQueryFromStorage(const StoragePtr & storage, const ASTPtr & ast_ columns = metadata_ptr->columns.getAll(); for (const auto & column_name_and_type: columns) { - const auto & ast_column_declaration = std::make_shared(); + const auto ast_column_declaration = std::make_shared(); ast_column_declaration->name = column_name_and_type.name; /// parser typename { diff --git a/src/Databases/PostgreSQL/DatabasePostgreSQL.cpp b/src/Databases/PostgreSQL/DatabasePostgreSQL.cpp index a846e23cd4f..032fc33ea16 100644 --- a/src/Databases/PostgreSQL/DatabasePostgreSQL.cpp +++ b/src/Databases/PostgreSQL/DatabasePostgreSQL.cpp @@ -12,9 +12,9 @@ #include #include #include +#include #include #include -#include #include #include #include @@ -25,6 +25,7 @@ #include #include + namespace fs = std::filesystem; namespace DB @@ -432,7 +433,7 @@ ASTPtr DatabasePostgreSQL::getCreateTableQueryImpl(const String & table_name, Co auto metadata_snapshot = storage->getInMemoryMetadataPtr(); for (const auto & column_type_and_name : metadata_snapshot->getColumns().getOrdinary()) { - const auto & column_declaration = std::make_shared(); + const auto column_declaration = std::make_shared(); column_declaration->name = column_type_and_name.name; column_declaration->type = getColumnDeclaration(column_type_and_name.type); columns_expression_list->children.emplace_back(column_declaration); @@ -470,17 +471,15 @@ ASTPtr DatabasePostgreSQL::getColumnDeclaration(const DataTypePtr & data_type) c WhichDataType which(data_type); if (which.isNullable()) - return makeASTFunction("Nullable", getColumnDeclaration(typeid_cast(data_type.get())->getNestedType())); + return makeASTDataType("Nullable", getColumnDeclaration(typeid_cast(data_type.get())->getNestedType())); if (which.isArray()) - return makeASTFunction("Array", getColumnDeclaration(typeid_cast(data_type.get())->getNestedType())); + return makeASTDataType("Array", getColumnDeclaration(typeid_cast(data_type.get())->getNestedType())); if (which.isDateTime64()) - { - return makeASTFunction("DateTime64", std::make_shared(static_cast(6))); - } + return makeASTDataType("DateTime64", std::make_shared(static_cast(6))); - return std::make_shared(data_type->getName()); + return makeASTDataType(data_type->getName()); } void registerDatabasePostgreSQL(DatabaseFactory & factory) diff --git a/src/Interpreters/InterpreterShowCreateQuery.cpp b/src/Interpreters/InterpreterShowCreateQuery.cpp index 0fca7b64d5a..ca5b7a3b5c1 100644 --- a/src/Interpreters/InterpreterShowCreateQuery.cpp +++ b/src/Interpreters/InterpreterShowCreateQuery.cpp @@ -1,9 +1,7 @@ #include #include -#include #include #include -#include #include #include #include diff --git a/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp b/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp index 4821d607d0e..f73965cfcc8 100644 --- a/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp +++ b/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -29,6 +30,7 @@ #include #include + namespace DB { @@ -95,22 +97,22 @@ NamesAndTypesList getColumnsList(const ASTExpressionList * columns_definition) } ASTPtr data_type = declare_column->data_type; - auto * data_type_function = data_type->as(); + auto * data_type_node = data_type->as(); - if (data_type_function) + if (data_type_node) { - String type_name_upper = Poco::toUpper(data_type_function->name); + String type_name_upper = Poco::toUpper(data_type_node->name); if (is_unsigned) { /// For example(in MySQL): CREATE TABLE test(column_name INT NOT NULL ... UNSIGNED) if (type_name_upper.find("INT") != String::npos && !endsWith(type_name_upper, "SIGNED") && !endsWith(type_name_upper, "UNSIGNED")) - data_type_function->name = type_name_upper + " UNSIGNED"; + data_type_node->name = type_name_upper + " UNSIGNED"; } if (type_name_upper == "SET") - data_type_function->arguments.reset(); + data_type_node->arguments.reset(); /// Transforms MySQL ENUM's list of strings to ClickHouse string-integer pairs /// For example ENUM('a', 'b', 'c') -> ENUM('a'=1, 'b'=2, 'c'=3) @@ -119,7 +121,7 @@ NamesAndTypesList getColumnsList(const ASTExpressionList * columns_definition) if (type_name_upper.find("ENUM") != String::npos) { UInt16 i = 0; - for (ASTPtr & child : data_type_function->arguments->children) + for (ASTPtr & child : data_type_node->arguments->children) { auto new_child = std::make_shared(); new_child->name = "equals"; @@ -133,10 +135,10 @@ NamesAndTypesList getColumnsList(const ASTExpressionList * columns_definition) } if (type_name_upper == "DATE") - data_type_function->name = "Date32"; + data_type_node->name = "Date32"; } if (is_nullable) - data_type = makeASTFunction("Nullable", data_type); + data_type = makeASTDataType("Nullable", data_type); columns_name_and_type.emplace_back(declare_column->name, DataTypeFactory::instance().get(data_type)); } @@ -175,7 +177,7 @@ static ColumnsDescription createColumnsDescription(const NamesAndTypesList & col return columns_description; } -static NamesAndTypesList getNames(const ASTFunction & expr, ContextPtr context, const NamesAndTypesList & columns) +static NamesAndTypesList getNames(const ASTDataType & expr, ContextPtr context, const NamesAndTypesList & columns) { if (expr.arguments->children.empty()) return NamesAndTypesList{}; @@ -219,9 +221,9 @@ static std::tuplechildren.empty()) { @@ -482,7 +484,7 @@ ASTs InterpreterCreateImpl::getRewrittenQueries( { auto column_declaration = std::make_shared(); column_declaration->name = name; - column_declaration->type = makeASTFunction(type); + column_declaration->type = makeASTDataType(type); column_declaration->default_specifier = "MATERIALIZED"; column_declaration->default_expression = std::make_shared(default_value); column_declaration->children.emplace_back(column_declaration->type); diff --git a/src/Interpreters/formatWithPossiblyHidingSecrets.h b/src/Interpreters/formatWithPossiblyHidingSecrets.h index 039bcbc2bca..ea8c295b169 100644 --- a/src/Interpreters/formatWithPossiblyHidingSecrets.h +++ b/src/Interpreters/formatWithPossiblyHidingSecrets.h @@ -1,11 +1,14 @@ #pragma once -#include "Access/ContextAccess.h" -#include "Interpreters/Context.h" + +#include +#include + #include namespace DB { + struct SecretHidingFormatSettings { // We can't store const Context& as there's a dangerous usage {.ctx = *getContext()} @@ -24,4 +27,5 @@ inline String format(const SecretHidingFormatSettings & settings) return settings.query.formatWithPossiblyHidingSensitiveData(settings.max_length, settings.one_line, show_secrets); } + } diff --git a/src/Parsers/ASTColumnDeclaration.cpp b/src/Parsers/ASTColumnDeclaration.cpp index 4a8a3d2967d..c96499095d5 100644 --- a/src/Parsers/ASTColumnDeclaration.cpp +++ b/src/Parsers/ASTColumnDeclaration.cpp @@ -1,8 +1,6 @@ #include #include #include -#include -#include namespace DB @@ -15,8 +13,6 @@ ASTPtr ASTColumnDeclaration::clone() const if (type) { - // Type may be an ASTFunction (e.g. `create table t (a Decimal(9,0))`), - // so we have to clone it properly as well. res->type = type->clone(); res->children.push_back(res->type); } diff --git a/src/Parsers/ASTDataType.cpp b/src/Parsers/ASTDataType.cpp new file mode 100644 index 00000000000..3c17ae8c380 --- /dev/null +++ b/src/Parsers/ASTDataType.cpp @@ -0,0 +1,57 @@ +#include +#include +#include + + +namespace DB +{ + +String ASTDataType::getID(char delim) const +{ + return "DataType" + (delim + name); +} + +ASTPtr ASTDataType::clone() const +{ + auto res = std::make_shared(*this); + res->children.clear(); + + if (arguments) + { + res->arguments = arguments->clone(); + res->children.push_back(res->arguments); + } + + return res; +} + +void ASTDataType::updateTreeHashImpl(SipHash & hash_state, bool) const +{ + hash_state.update(name.size()); + hash_state.update(name); + /// Children are hashed automatically. +} + +void ASTDataType::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const +{ + settings.ostr << (settings.hilite ? hilite_function : "") << name; + + if (arguments && !arguments->children.empty()) + { + settings.ostr << '(' << (settings.hilite ? hilite_none : ""); + + for (size_t i = 0, size = arguments->children.size(); i < size; ++i) + { + if (i != 0) + settings.ostr << ", "; + + arguments->children[i]->formatImpl(settings, state, frame); + } + + settings.ostr << (settings.hilite ? hilite_function : "") << ')'; + } + + settings.ostr << (settings.hilite ? hilite_none : ""); +} + +} diff --git a/src/Parsers/ASTDataType.h b/src/Parsers/ASTDataType.h new file mode 100644 index 00000000000..c8f3c6e2e9d --- /dev/null +++ b/src/Parsers/ASTDataType.h @@ -0,0 +1,36 @@ +#pragma once + +#include + + +namespace DB +{ + +/// AST for data types, e.g. UInt8 or Tuple(x UInt8, y Enum(a = 1)) +class ASTDataType : public IAST +{ +public: + String name; + ASTPtr arguments; + + String getID(char delim) const override; + ASTPtr clone() const override; + void updateTreeHashImpl(SipHash & hash_state, bool ignore_aliases) const override; + void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override; +}; + +template +std::shared_ptr makeASTDataType(const String & name, Args &&... args) +{ + auto function = std::make_shared(); + + function->name = name; + function->arguments = std::make_shared(); + function->children.push_back(function->arguments); + + function->arguments->children = { std::forward(args)... }; + + return function; +} + +} diff --git a/src/Parsers/ASTFunction.cpp b/src/Parsers/ASTFunction.cpp index f39229d7566..7d42b6d1e9c 100644 --- a/src/Parsers/ASTFunction.cpp +++ b/src/Parsers/ASTFunction.cpp @@ -7,7 +7,6 @@ #include #include #include -#include #include #include #include @@ -19,9 +18,6 @@ #include #include #include -#include - -#include using namespace std::literals; diff --git a/src/Parsers/ASTFunction.h b/src/Parsers/ASTFunction.h index 3a94691f25d..be2b6beae54 100644 --- a/src/Parsers/ASTFunction.h +++ b/src/Parsers/ASTFunction.h @@ -46,7 +46,7 @@ public: NullsAction nulls_action = NullsAction::EMPTY; - /// do not print empty parentheses if there are no args - compatibility with new AST for data types and engine names. + /// do not print empty parentheses if there are no args - compatibility with engine names. bool no_empty_args = false; /// Specifies where this function-like expression is used. diff --git a/src/Parsers/IAST.h b/src/Parsers/IAST.h index ee70fed0f07..e7e2ff2ec4a 100644 --- a/src/Parsers/IAST.h +++ b/src/Parsers/IAST.h @@ -271,16 +271,15 @@ public: throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown element in AST: {}", getID()); } - // Secrets are displayed regarding show_secrets, then SensitiveDataMasker is applied. - // You can use Interpreters/formatWithPossiblyHidingSecrets.h for convenience. + /// Secrets are displayed regarding show_secrets, then SensitiveDataMasker is applied. + /// You can use Interpreters/formatWithPossiblyHidingSecrets.h for convenience. String formatWithPossiblyHidingSensitiveData(size_t max_length, bool one_line, bool show_secrets) const; - /* - * formatForLogging and formatForErrorMessage always hide secrets. This inconsistent - * behaviour is due to the fact such functions are called from Client which knows nothing about - * access rights and settings. Moreover, the only use case for displaying secrets are backups, - * and backup tools use only direct input and ignore logs and error messages. - */ + /** formatForLogging and formatForErrorMessage always hide secrets. This inconsistent + * behaviour is due to the fact such functions are called from Client which knows nothing about + * access rights and settings. Moreover, the only use case for displaying secrets are backups, + * and backup tools use only direct input and ignore logs and error messages. + */ String formatForLogging(size_t max_length = 0) const { return formatWithPossiblyHidingSensitiveData(max_length, true, false); diff --git a/src/Parsers/ParserCreateQuery.cpp b/src/Parsers/ParserCreateQuery.cpp index 014dc7bd3bf..fa232954cd6 100644 --- a/src/Parsers/ParserCreateQuery.cpp +++ b/src/Parsers/ParserCreateQuery.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -75,9 +76,9 @@ bool ParserNestedTable::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (!close.ignore(pos, expected)) return false; - auto func = std::make_shared(); + auto func = std::make_shared(); tryGetIdentifierNameInto(name, func->name); - // FIXME(ilezhankin): func->no_empty_args = true; ? + func->arguments = columns; func->children.push_back(columns); node = func; @@ -749,7 +750,7 @@ bool ParserCreateTableQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expe auto * table_id = table->as(); - // Shortcut for ATTACH a previously detached table + /// A shortcut for ATTACH a previously detached table. bool short_attach = attach && !from_path; if (short_attach && (!pos.isValid() || pos.get().type == TokenType::Semicolon)) { diff --git a/src/Parsers/ParserCreateQuery.h b/src/Parsers/ParserCreateQuery.h index bb37491a366..7bd1d1bf588 100644 --- a/src/Parsers/ParserCreateQuery.h +++ b/src/Parsers/ParserCreateQuery.h @@ -101,17 +101,15 @@ class IParserColumnDeclaration : public IParserBase { public: explicit IParserColumnDeclaration(bool require_type_ = true, bool allow_null_modifiers_ = false, bool check_keywords_after_name_ = false) - : require_type(require_type_) - , allow_null_modifiers(allow_null_modifiers_) - , check_keywords_after_name(check_keywords_after_name_) + : require_type(require_type_) + , allow_null_modifiers(allow_null_modifiers_) + , check_keywords_after_name(check_keywords_after_name_) { } void enableCheckTypeKeyword() { check_type_keyword = true; } protected: - using ASTDeclarePtr = std::shared_ptr; - const char * getName() const override{ return "column declaration"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; diff --git a/src/Parsers/ParserDataType.cpp b/src/Parsers/ParserDataType.cpp index ad33c7e4558..63800819899 100644 --- a/src/Parsers/ParserDataType.cpp +++ b/src/Parsers/ParserDataType.cpp @@ -1,6 +1,7 @@ #include #include +#include #include #include #include @@ -198,13 +199,12 @@ bool ParserDataType::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) } } - auto function_node = std::make_shared(); - function_node->name = type_name; - function_node->no_empty_args = true; + auto data_type_node = std::make_shared(); + data_type_node->name = type_name; if (pos->type != TokenType::OpeningRoundBracket) { - node = function_node; + node = data_type_node; return true; } ++pos; @@ -222,10 +222,10 @@ bool ParserDataType::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) return false; ++pos; - function_node->arguments = expr_list_args; - function_node->children.push_back(function_node->arguments); + data_type_node->arguments = expr_list_args; + data_type_node->children.push_back(data_type_node->arguments); - node = function_node; + node = data_type_node; return true; } diff --git a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp index a904b29e12f..f7dde509d4e 100644 --- a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp +++ b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp @@ -7,10 +7,8 @@ #include #include -#include #include -#include #include #include #include @@ -22,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -295,7 +294,7 @@ std::shared_ptr StorageMaterializedPostgreSQL::getMaterial auto column_declaration = std::make_shared(); column_declaration->name = std::move(name); - column_declaration->type = makeASTFunction(type); + column_declaration->type = makeASTDataType(type); column_declaration->default_specifier = "MATERIALIZED"; column_declaration->default_expression = std::make_shared(default_value); @@ -312,17 +311,17 @@ ASTPtr StorageMaterializedPostgreSQL::getColumnDeclaration(const DataTypePtr & d WhichDataType which(data_type); if (which.isNullable()) - return makeASTFunction("Nullable", getColumnDeclaration(typeid_cast(data_type.get())->getNestedType())); + return makeASTDataType("Nullable", getColumnDeclaration(typeid_cast(data_type.get())->getNestedType())); if (which.isArray()) - return makeASTFunction("Array", getColumnDeclaration(typeid_cast(data_type.get())->getNestedType())); + return makeASTDataType("Array", getColumnDeclaration(typeid_cast(data_type.get())->getNestedType())); /// getName() for decimal returns 'Decimal(precision, scale)', will get an error with it if (which.isDecimal()) { auto make_decimal_expression = [&](std::string type_name) { - auto ast_expression = std::make_shared(); + auto ast_expression = std::make_shared(); ast_expression->name = type_name; ast_expression->arguments = std::make_shared(); @@ -354,7 +353,7 @@ ASTPtr StorageMaterializedPostgreSQL::getColumnDeclaration(const DataTypePtr & d return ast_expression; } - return std::make_shared(data_type->getName()); + return makeASTDataType(data_type->getName()); } diff --git a/src/Storages/WindowView/StorageWindowView.cpp b/src/Storages/WindowView/StorageWindowView.cpp index e15da0074d5..65a30b18e96 100644 --- a/src/Storages/WindowView/StorageWindowView.cpp +++ b/src/Storages/WindowView/StorageWindowView.cpp @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -805,7 +806,7 @@ ASTPtr StorageWindowView::getInnerTableCreateQuery(const ASTPtr & inner_query, c { auto column_window = std::make_shared(); column_window->name = window_id_name; - column_window->type = std::make_shared("UInt32"); + column_window->type = makeASTDataType("UInt32"); columns_list->children.push_back(column_window); } From 2860aa514d0d8837bd72a87390686b20e05ecae7 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 15 Jul 2024 03:50:38 +0200 Subject: [PATCH 080/661] Fix style --- src/DataTypes/DataTypeFactory.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/DataTypes/DataTypeFactory.cpp b/src/DataTypes/DataTypeFactory.cpp index 45552e506cd..db6e1738d22 100644 --- a/src/DataTypes/DataTypeFactory.cpp +++ b/src/DataTypes/DataTypeFactory.cpp @@ -22,7 +22,6 @@ namespace ErrorCodes { extern const int LOGICAL_ERROR; extern const int UNKNOWN_TYPE; - extern const int ILLEGAL_SYNTAX_FOR_DATA_TYPE; extern const int UNEXPECTED_AST_STRUCTURE; extern const int DATA_TYPE_CANNOT_HAVE_ARGUMENTS; } From 985f398925266c5867cd25fcdf655a5a306928fd Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 15 Jul 2024 05:33:44 +0200 Subject: [PATCH 081/661] Fix error --- src/Client/ClientBase.cpp | 1 - src/DataTypes/DataTypeAggregateFunction.cpp | 4 +-- src/Parsers/ParserDataType.cpp | 36 ++++++++++++++------- src/TableFunctions/ITableFunction.cpp | 1 - 4 files changed, 26 insertions(+), 16 deletions(-) diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 13dce05cabc..95d2dff54e6 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -70,7 +70,6 @@ #include #include -#include #include #include #include diff --git a/src/DataTypes/DataTypeAggregateFunction.cpp b/src/DataTypes/DataTypeAggregateFunction.cpp index ef7d86d2a81..09175617bf1 100644 --- a/src/DataTypes/DataTypeAggregateFunction.cpp +++ b/src/DataTypes/DataTypeAggregateFunction.cpp @@ -257,8 +257,8 @@ static DataTypePtr create(const ASTPtr & arguments) } else throw Exception(ErrorCodes::BAD_ARGUMENTS, - "Unexpected AST element passed as aggregate function name for data type AggregateFunction. " - "Must be identifier or function."); + "Unexpected AST element {} passed as aggregate function name for data type AggregateFunction. " + "Must be identifier or function", data_type_ast->getID()); for (size_t i = argument_types_start_idx; i < arguments->children.size(); ++i) argument_types.push_back(DataTypeFactory::instance().get(arguments->children[i])); diff --git a/src/Parsers/ParserDataType.cpp b/src/Parsers/ParserDataType.cpp index 63800819899..78b5aaa93a6 100644 --- a/src/Parsers/ParserDataType.cpp +++ b/src/Parsers/ParserDataType.cpp @@ -70,20 +70,32 @@ private: DynamicArgumentsParser parser; return parser.parse(pos, node, expected); } - - ParserNestedTable nested_parser; - ParserDataType data_type_parser; - ParserAllCollectionsOfLiterals literal_parser(false); - - const char * operators[] = {"=", "equals", nullptr}; - ParserLeftAssociativeBinaryOperatorList enum_parser(operators, std::make_unique()); - - if (pos->type == TokenType::BareWord && std::string_view(pos->begin, pos->size()) == "Nested") + else if (type_name == "Nested") + { + ParserNestedTable nested_parser; return nested_parser.parse(pos, node, expected); + } + else if (type_name == "AggregateFunction") + { + ParserFunction function_parser; + ParserIdentifier identifier_parser; + ParserAllCollectionsOfLiterals literal_parser(false); + return literal_parser.parse(pos, node, expected) + || identifier_parser.parse(pos, node, expected) + || function_parser.parse(pos, node, expected); + } + else + { + ParserDataType data_type_parser; + ParserAllCollectionsOfLiterals literal_parser(false); - return enum_parser.parse(pos, node, expected) - || literal_parser.parse(pos, node, expected) - || data_type_parser.parse(pos, node, expected); + const char * operators[] = {"=", "equals", nullptr}; + ParserLeftAssociativeBinaryOperatorList enum_parser(operators, std::make_unique()); + + return enum_parser.parse(pos, node, expected) + || literal_parser.parse(pos, node, expected) + || data_type_parser.parse(pos, node, expected); + } } std::string_view type_name; diff --git a/src/TableFunctions/ITableFunction.cpp b/src/TableFunctions/ITableFunction.cpp index e5676c5c25d..916ff7ec022 100644 --- a/src/TableFunctions/ITableFunction.cpp +++ b/src/TableFunctions/ITableFunction.cpp @@ -1,5 +1,4 @@ #include -#include #include #include #include From 15be94ee14a7affe6643dd4c3ac1b104e3c69eeb Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 16 Jul 2024 05:47:27 +0200 Subject: [PATCH 082/661] Update src/Parsers/ASTDataType.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: János Benjamin Antal --- src/Parsers/ASTDataType.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/Parsers/ASTDataType.h b/src/Parsers/ASTDataType.h index c8f3c6e2e9d..abe5cbb8626 100644 --- a/src/Parsers/ASTDataType.h +++ b/src/Parsers/ASTDataType.h @@ -22,13 +22,13 @@ public: template std::shared_ptr makeASTDataType(const String & name, Args &&... args) { - auto function = std::make_shared(); + auto data_type = std::make_shared(); - function->name = name; - function->arguments = std::make_shared(); - function->children.push_back(function->arguments); + data_type->name = name; + data_type->arguments = std::make_shared(); + data_type->children.push_back(function->arguments); - function->arguments->children = { std::forward(args)... }; + data_type->arguments->children = { std::forward(args)... }; return function; } From 87fa2c64e96c6bac67275207bd708ac231fa9fb6 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 16 Jul 2024 05:48:00 +0200 Subject: [PATCH 083/661] Apply review comments --- src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp index f7dde509d4e..e795cd9c6c6 100644 --- a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp +++ b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp @@ -345,7 +345,7 @@ ASTPtr StorageMaterializedPostgreSQL::getColumnDeclaration(const DataTypePtr & d if (which.isDateTime64()) { - auto ast_expression = std::make_shared(); + auto ast_expression = std::make_shared(); ast_expression->name = "DateTime64"; ast_expression->arguments = std::make_shared(); From 88dce34be907863b6f3cf6890be87b0d7278a101 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 16 Jul 2024 05:48:31 +0200 Subject: [PATCH 084/661] Fix error --- src/Parsers/ASTDataType.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Parsers/ASTDataType.h b/src/Parsers/ASTDataType.h index abe5cbb8626..d9427c2fd9e 100644 --- a/src/Parsers/ASTDataType.h +++ b/src/Parsers/ASTDataType.h @@ -26,11 +26,11 @@ std::shared_ptr makeASTDataType(const String & name, Args &&... arg data_type->name = name; data_type->arguments = std::make_shared(); - data_type->children.push_back(function->arguments); + data_type->children.push_back(data_type->arguments); data_type->arguments->children = { std::forward(args)... }; - return function; + return data_type; } } From 4d4e0901881221b39cce0e0527f530fe90eb7ad3 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 16 Jul 2024 05:58:06 +0200 Subject: [PATCH 085/661] Fix build --- src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp index e795cd9c6c6..e7b58841c4e 100644 --- a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp +++ b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp @@ -7,6 +7,7 @@ #include #include +#include #include #include From 2832f8c57e37a1fc7d0c91b9ad621785c6d2a5a5 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 17 Jul 2024 04:59:04 +0200 Subject: [PATCH 086/661] Fix a typo --- src/Databases/DatabasesCommon.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Databases/DatabasesCommon.cpp b/src/Databases/DatabasesCommon.cpp index cacba581745..fdbdb610275 100644 --- a/src/Databases/DatabasesCommon.cpp +++ b/src/Databases/DatabasesCommon.cpp @@ -164,7 +164,7 @@ ASTPtr getCreateQueryFromStorage(const StoragePtr & storage, const ASTPtr & ast_ if (!parser.parse(pos, ast_type, expected)) { if (throw_on_error) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot parser metadata of {}.{}", + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot parse metadata of {}.{}", backQuote(table_id.database_name), backQuote(table_id.table_name)); else return nullptr; From 6a7a4df1eca0cda4fd2efdcc8aaf2e8741f4cbcc Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 17 Jul 2024 05:02:26 +0200 Subject: [PATCH 087/661] Fix error --- src/Parsers/ParserDataType.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Parsers/ParserDataType.cpp b/src/Parsers/ParserDataType.cpp index 78b5aaa93a6..af1a299a887 100644 --- a/src/Parsers/ParserDataType.cpp +++ b/src/Parsers/ParserDataType.cpp @@ -75,7 +75,7 @@ private: ParserNestedTable nested_parser; return nested_parser.parse(pos, node, expected); } - else if (type_name == "AggregateFunction") + else if (type_name == "AggregateFunction" || type_name == "SimpleAggregateFunction") { ParserFunction function_parser; ParserIdentifier identifier_parser; From cc201745620490c7d885a9e45d9f4b92f7492c10 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 17 Jul 2024 05:19:58 +0200 Subject: [PATCH 088/661] Fix bad code: it was catching exceptions --- src/IO/WithFileSize.cpp | 48 +++++++++---------- ...ry_and_native_with_binary_encoded_types.sh | 4 +- 2 files changed, 25 insertions(+), 27 deletions(-) diff --git a/src/IO/WithFileSize.cpp b/src/IO/WithFileSize.cpp index 3660d962c08..8cea12fa200 100644 --- a/src/IO/WithFileSize.cpp +++ b/src/IO/WithFileSize.cpp @@ -14,40 +14,38 @@ namespace ErrorCodes } template -static size_t getFileSize(T & in) +static std::optional tryGetFileSize(T & in) { if (auto * with_file_size = dynamic_cast(&in)) - { return with_file_size->getFileSize(); - } + + return std::nullopt; +} + +template +static size_t getFileSize(T & in) +{ + if (auto maybe_size = tryGetFileSize(in)) + return *maybe_size; throw Exception(ErrorCodes::UNKNOWN_FILE_SIZE, "Cannot find out file size"); } -size_t getFileSizeFromReadBuffer(ReadBuffer & in) -{ - if (auto * delegate = dynamic_cast(&in)) - { - return getFileSize(delegate->getWrappedReadBuffer()); - } - else if (auto * compressed = dynamic_cast(&in)) - { - return getFileSize(compressed->getWrappedReadBuffer()); - } - - return getFileSize(in); -} - std::optional tryGetFileSizeFromReadBuffer(ReadBuffer & in) { - try - { - return getFileSizeFromReadBuffer(in); - } - catch (...) - { - return std::nullopt; - } + if (auto * delegate = dynamic_cast(&in)) + return tryGetFileSize(delegate->getWrappedReadBuffer()); + else if (auto * compressed = dynamic_cast(&in)) + return tryGetFileSize(compressed->getWrappedReadBuffer()); + return tryGetFileSize(in); +} + +size_t getFileSizeFromReadBuffer(ReadBuffer & in) +{ + if (auto maybe_size = tryGetFileSizeFromReadBuffer(in)) + return *maybe_size; + + throw Exception(ErrorCodes::UNKNOWN_FILE_SIZE, "Cannot find out file size"); } bool isBufferWithFileSize(const ReadBuffer & in) diff --git a/tests/queries/0_stateless/03173_row_binary_and_native_with_binary_encoded_types.sh b/tests/queries/0_stateless/03173_row_binary_and_native_with_binary_encoded_types.sh index 723b11ad620..0c585d36348 100755 --- a/tests/queries/0_stateless/03173_row_binary_and_native_with_binary_encoded_types.sh +++ b/tests/queries/0_stateless/03173_row_binary_and_native_with_binary_encoded_types.sh @@ -6,8 +6,8 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) function test { - $CLICKHOUSE_LOCAL --allow_experimental_dynamic_type=1 --allow_experimental_variant_type=1 --output_format_binary_encode_types_in_binary_format=1 -q "select $1 as value format RowBinaryWithNamesAndTypes" | $CLICKHOUSE_LOCAL --input-format RowBinaryWithNamesAndTypes --input_format_binary_decode_types_in_binary_format=1 -q "select value, toTypeName(value) from table" - $CLICKHOUSE_LOCAL --allow_experimental_dynamic_type=1 --allow_experimental_variant_type=1 --output_format_native_encode_types_in_binary_format=1 -q "select $1 as value format Native" | $CLICKHOUSE_LOCAL --input-format Native --input_format_native_decode_types_in_binary_format=1 -q "select value, toTypeName(value) from table" + $CLICKHOUSE_LOCAL --allow_experimental_dynamic_type=1 --allow_experimental_variant_type=1 --output_format_binary_encode_types_in_binary_format=1 -q "select $1 as value format RowBinaryWithNamesAndTypes" | $CLICKHOUSE_LOCAL --input-format RowBinaryWithNamesAndTypes --input_format_binary_decode_types_in_binary_format=1 -q "select value, toTypeName(value) from table" + $CLICKHOUSE_LOCAL --allow_experimental_dynamic_type=1 --allow_experimental_variant_type=1 --output_format_native_encode_types_in_binary_format=1 -q "select $1 as value format Native" | $CLICKHOUSE_LOCAL --input-format Native --input_format_native_decode_types_in_binary_format=1 -q "select value, toTypeName(value) from table" } test "materialize(42)::UInt8" From e1a24c9dd6f6320ce02714265e91e83f8dbf43f6 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 17 Jul 2024 05:45:57 +0200 Subject: [PATCH 089/661] Fix error --- src/Parsers/ParserDataType.cpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/Parsers/ParserDataType.cpp b/src/Parsers/ParserDataType.cpp index af1a299a887..c5da4a32e92 100644 --- a/src/Parsers/ParserDataType.cpp +++ b/src/Parsers/ParserDataType.cpp @@ -3,7 +3,6 @@ #include #include #include -#include #include #include #include @@ -80,9 +79,9 @@ private: ParserFunction function_parser; ParserIdentifier identifier_parser; ParserAllCollectionsOfLiterals literal_parser(false); - return literal_parser.parse(pos, node, expected) - || identifier_parser.parse(pos, node, expected) - || function_parser.parse(pos, node, expected); + return function_parser.parse(pos, node, expected) + || literal_parser.parse(pos, node, expected) + || identifier_parser.parse(pos, node, expected); } else { From bc1146389617f9e9198d0dd25eea89a9206421ba Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 19 Jul 2024 03:16:23 +0200 Subject: [PATCH 090/661] Fix error --- src/Parsers/ParserDataType.cpp | 142 +++++++++++++++++++-------------- 1 file changed, 84 insertions(+), 58 deletions(-) diff --git a/src/Parsers/ParserDataType.cpp b/src/Parsers/ParserDataType.cpp index c5da4a32e92..2edb0141e12 100644 --- a/src/Parsers/ParserDataType.cpp +++ b/src/Parsers/ParserDataType.cpp @@ -46,60 +46,6 @@ private: } }; -/// Wrapper to allow mixed lists of nested and normal types. -/// Parameters are either: -/// - Nested table elements; -/// - Enum element in form of 'a' = 1; -/// - literal; -/// - Dynamic type arguments; -/// - another data type (or identifier); -class ParserDataTypeArgument : public IParserBase -{ -public: - explicit ParserDataTypeArgument(std::string_view type_name_) : type_name(type_name_) - { - } - -private: - const char * getName() const override { return "data type argument"; } - bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override - { - if (type_name == "Dynamic") - { - DynamicArgumentsParser parser; - return parser.parse(pos, node, expected); - } - else if (type_name == "Nested") - { - ParserNestedTable nested_parser; - return nested_parser.parse(pos, node, expected); - } - else if (type_name == "AggregateFunction" || type_name == "SimpleAggregateFunction") - { - ParserFunction function_parser; - ParserIdentifier identifier_parser; - ParserAllCollectionsOfLiterals literal_parser(false); - return function_parser.parse(pos, node, expected) - || literal_parser.parse(pos, node, expected) - || identifier_parser.parse(pos, node, expected); - } - else - { - ParserDataType data_type_parser; - ParserAllCollectionsOfLiterals literal_parser(false); - - const char * operators[] = {"=", "equals", nullptr}; - ParserLeftAssociativeBinaryOperatorList enum_parser(operators, std::make_unique()); - - return enum_parser.parse(pos, node, expected) - || literal_parser.parse(pos, node, expected) - || data_type_parser.parse(pos, node, expected); - } - } - - std::string_view type_name; -}; - } bool ParserDataType::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) @@ -221,11 +167,91 @@ bool ParserDataType::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) ++pos; /// Parse optional parameters - ParserList args_parser(std::make_unique(type_name), std::make_unique(TokenType::Comma)); - ASTPtr expr_list_args; + ASTPtr expr_list_args = std::make_shared(); + + /// Allow mixed lists of nested and normal types. + /// Parameters are either: + /// - Nested table elements; + /// - Enum element in form of 'a' = 1; + /// - literal; + /// - Dynamic type arguments; + /// - another data type (or identifier); + + size_t arg_num = 0; + bool have_version_of_aggregate_function = false; + while (true) + { + if (arg_num > 0) + { + if (pos->type == TokenType::Comma) + ++pos; + else + break; + } + + ASTPtr arg; + if (type_name == "Dynamic") + { + DynamicArgumentsParser parser; + parser.parse(pos, arg, expected); + } + else if (type_name == "Nested") + { + ParserNestedTable nested_parser; + nested_parser.parse(pos, arg, expected); + } + else if (type_name == "AggregateFunction" || type_name == "SimpleAggregateFunction") + { + /// This is less trivial. + /// The first optional argument for AggregateFunction is a numeric literal, defining the version. + /// The next argument is the function name, optionally with parameters. + /// Subsequent arguments are data types. + + if (arg_num == 0 && type_name == "AggregateFunction") + { + ParserUnsignedInteger version_parser; + if (version_parser.parse(pos, arg, expected)) + { + have_version_of_aggregate_function = true; + expr_list_args->children.emplace_back(std::move(arg)); + ++arg_num; + continue; + } + } + + if (arg_num == (have_version_of_aggregate_function ? 1 : 0)) + { + ParserFunction function_parser; + ParserIdentifier identifier_parser; + function_parser.parse(pos, arg, expected) + || identifier_parser.parse(pos, arg, expected); + } + else + { + ParserDataType data_type_parser; + data_type_parser.parse(pos, arg, expected); + } + } + else + { + ParserDataType data_type_parser; + ParserAllCollectionsOfLiterals literal_parser(false); + + const char * operators[] = {"=", "equals", nullptr}; + ParserLeftAssociativeBinaryOperatorList enum_parser(operators, std::make_unique()); + + enum_parser.parse(pos, arg, expected) + || literal_parser.parse(pos, arg, expected) + || data_type_parser.parse(pos, arg, expected); + } + + if (!arg) + break; + + expr_list_args->children.emplace_back(std::move(arg)); + ++arg_num; + } - if (!args_parser.parse(pos, expr_list_args, expected)) - return false; if (pos->type == TokenType::Comma) // ignore trailing comma inside Nested structures like Tuple(Int, Tuple(Int, String),) ++pos; From 48e7708d7bcf575123ea20cee9455e0a4cf26791 Mon Sep 17 00:00:00 2001 From: Xu Jia Date: Fri, 19 Jul 2024 10:29:13 +0800 Subject: [PATCH 091/661] fix compile error --- src/Interpreters/InterpreterAlterQuery.cpp | 1 + src/Interpreters/InterpreterDeleteQuery.cpp | 1 + src/Interpreters/InterpreterInsertQuery.cpp | 8 ++++---- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/Interpreters/InterpreterAlterQuery.cpp b/src/Interpreters/InterpreterAlterQuery.cpp index 398fe31f1a9..9b5b5dfc20a 100644 --- a/src/Interpreters/InterpreterAlterQuery.cpp +++ b/src/Interpreters/InterpreterAlterQuery.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include diff --git a/src/Interpreters/InterpreterDeleteQuery.cpp b/src/Interpreters/InterpreterDeleteQuery.cpp index b37ec4de4ab..291c8e19db0 100644 --- a/src/Interpreters/InterpreterDeleteQuery.cpp +++ b/src/Interpreters/InterpreterDeleteQuery.cpp @@ -3,6 +3,7 @@ #include #include +#include #include #include #include diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index 181fb064b54..aef6c1249d5 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -411,10 +412,6 @@ std::pair, std::vector> InterpreterInsertQuery::buildP if (!running_group) running_group = std::make_shared(getContext()); - if (getContext()->getServerSettings().disable_insertion_and_mutation - && query.table_id.database_name != DatabaseCatalog::SYSTEM_DATABASE) - throw Exception(ErrorCodes::QUERY_IS_PROHIBITED, "Insert queries are prohibited"); - std::vector sink_chains; std::vector presink_chains; @@ -737,6 +734,9 @@ BlockIO InterpreterInsertQuery::execute() const Settings & settings = getContext()->getSettingsRef(); auto & query = query_ptr->as(); + if (getContext()->getServerSettings().disable_insertion_and_mutation + && query.table_id.database_name != DatabaseCatalog::SYSTEM_DATABASE) + throw Exception(ErrorCodes::QUERY_IS_PROHIBITED, "Insert queries are prohibited"); StoragePtr table = getTable(query); checkStorageSupportsTransactionsIfNeeded(table, getContext()); From 4cb862432c50848e3406899f5c7079b4cf1d62a8 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Fri, 19 Jul 2024 09:34:20 +0000 Subject: [PATCH 092/661] Rename method --- src/Processors/IProcessor.h | 2 +- src/Processors/Sources/RemoteSource.cpp | 2 +- src/Processors/Sources/RemoteSource.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Processors/IProcessor.h b/src/Processors/IProcessor.h index 358983a2179..0776921a814 100644 --- a/src/Processors/IProcessor.h +++ b/src/Processors/IProcessor.h @@ -221,7 +221,7 @@ public: throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method 'schedule' is not implemented for {} processor", getName()); } - virtual void asyncJobReady() {} + virtual void onAsyncJobReady() {} /** You must call this method if 'prepare' returned ExpandPipeline. * This method cannot access any port, but it can create new ports for current processor. diff --git a/src/Processors/Sources/RemoteSource.cpp b/src/Processors/Sources/RemoteSource.cpp index 3ec2356a121..587f6e2001b 100644 --- a/src/Processors/Sources/RemoteSource.cpp +++ b/src/Processors/Sources/RemoteSource.cpp @@ -104,7 +104,7 @@ void RemoteSource::work() ISource::work(); } -void RemoteSource::asyncJobReady() +void RemoteSource::onAsyncJobReady() { chassert(async_read); diff --git a/src/Processors/Sources/RemoteSource.h b/src/Processors/Sources/RemoteSource.h index fa04985f101..2247c781584 100644 --- a/src/Processors/Sources/RemoteSource.h +++ b/src/Processors/Sources/RemoteSource.h @@ -32,7 +32,7 @@ public: int schedule() override { return fd; } - void asyncJobReady() override; + void onAsyncJobReady() override; void setStorageLimits(const std::shared_ptr & storage_limits_) override; From 8349d260952a6daeb84c653c37ac000cf5302cfd Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Fri, 19 Jul 2024 11:25:34 +0000 Subject: [PATCH 093/661] Simplified implementation --- src/Processors/Sources/RemoteSource.cpp | 17 +++++------ src/Processors/Sources/RemoteSource.h | 2 +- src/QueryPipeline/RemoteQueryExecutor.cpp | 37 +++++++++++++++++++++++ src/QueryPipeline/RemoteQueryExecutor.h | 3 ++ 4 files changed, 49 insertions(+), 10 deletions(-) diff --git a/src/Processors/Sources/RemoteSource.cpp b/src/Processors/Sources/RemoteSource.cpp index 587f6e2001b..46c27676e12 100644 --- a/src/Processors/Sources/RemoteSource.cpp +++ b/src/Processors/Sources/RemoteSource.cpp @@ -89,9 +89,6 @@ ISource::Status RemoteSource::prepare() void RemoteSource::work() { - if (async_immediate_work.exchange(false)) - return; - /// Connection drain is a heavy operation that may take a long time. /// Therefore we move connection drain from prepare() to work(), and drain multiple connections in parallel. /// See issue: https://github.com/ClickHouse/ClickHouse/issues/60844 @@ -101,6 +98,13 @@ void RemoteSource::work() executor_finished = true; return; } + + if (preprocessed_packet) + { + preprocessed_packet = false; + return; + } + ISource::work(); } @@ -111,12 +115,7 @@ void RemoteSource::onAsyncJobReady() if (!was_query_sent) return; - auto res = query_executor->readAsync(/*check_packet_type_only=*/true); - if (res.type == RemoteQueryExecutor::ReadResult::Type::ParallelReplicasToken) - { - work(); - async_immediate_work = true; - } + preprocessed_packet = query_executor->processParallelReplicaPacketIfAny(); } std::optional RemoteSource::tryGenerate() diff --git a/src/Processors/Sources/RemoteSource.h b/src/Processors/Sources/RemoteSource.h index 2247c781584..22d3921708b 100644 --- a/src/Processors/Sources/RemoteSource.h +++ b/src/Processors/Sources/RemoteSource.h @@ -54,7 +54,7 @@ private: int fd = -1; size_t rows = 0; bool manually_add_rows_before_limit_counter = false; - std::atomic_bool async_immediate_work{false}; + bool preprocessed_packet = false; }; /// Totals source from RemoteQueryExecutor. diff --git a/src/QueryPipeline/RemoteQueryExecutor.cpp b/src/QueryPipeline/RemoteQueryExecutor.cpp index d7edbc9ed35..b15e31a120f 100644 --- a/src/QueryPipeline/RemoteQueryExecutor.cpp +++ b/src/QueryPipeline/RemoteQueryExecutor.cpp @@ -936,4 +936,41 @@ bool RemoteQueryExecutor::needToSkipUnavailableShard() const return context->getSettingsRef().skip_unavailable_shards && (0 == connections->size()); } +bool RemoteQueryExecutor::processParallelReplicaPacketIfAny() +{ +#if defined(OS_LINUX) + if (!read_context || (resent_query && recreate_read_context)) + { + std::lock_guard lock(was_cancelled_mutex); + if (was_cancelled) + return false; + + read_context = std::make_unique(*this); + recreate_read_context = false; + } + + { + std::lock_guard lock(was_cancelled_mutex); + if (was_cancelled) + return false; + + chassert(!has_postponed_packet); + + read_context->resume(); + if (read_context->isInProgress()) // <- nothing to process + return false; + + const auto packet_type = read_context->getPacketType(); + if (packet_type == Protocol::Server::MergeTreeReadTaskRequest || packet_type == Protocol::Server::MergeTreeAllRangesAnnouncement) + { + processPacket(read_context->getPacket()); + return true; + } + + has_postponed_packet = true; + return false; + } +#endif +} + } diff --git a/src/QueryPipeline/RemoteQueryExecutor.h b/src/QueryPipeline/RemoteQueryExecutor.h index 6849c3e0a07..6f56df71f1d 100644 --- a/src/QueryPipeline/RemoteQueryExecutor.h +++ b/src/QueryPipeline/RemoteQueryExecutor.h @@ -222,6 +222,9 @@ public: bool isReplicaUnavailable() const { return extension && extension->parallel_reading_coordinator && connections->size() == 0; } + /// return true if parallel replica packet was processed + bool processParallelReplicaPacketIfAny(); + private: RemoteQueryExecutor( const String & query_, From 4e3fdfc2d6482d42b8e152911e24ee38b1bafc89 Mon Sep 17 00:00:00 2001 From: Alexander Gololobov Date: Fri, 19 Jul 2024 13:26:35 +0200 Subject: [PATCH 094/661] Save writer thread id for debugging --- src/Common/SharedMutex.cpp | 10 +++++++++- src/Common/SharedMutex.h | 2 ++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/src/Common/SharedMutex.cpp b/src/Common/SharedMutex.cpp index 1df09ca998a..7b00ef0b28b 100644 --- a/src/Common/SharedMutex.cpp +++ b/src/Common/SharedMutex.cpp @@ -1,4 +1,5 @@ #include +#include #ifdef OS_LINUX /// Because of futex @@ -12,6 +13,7 @@ namespace DB SharedMutex::SharedMutex() : state(0) , waiters(0) + , writer_thread_id(0) {} void SharedMutex::lock() @@ -32,16 +34,22 @@ void SharedMutex::lock() value |= writers; while (value & readers) futexWaitLowerFetch(state, value); + + writer_thread_id.store(getThreadId()); } bool SharedMutex::try_lock() { UInt64 value = 0; - return state.compare_exchange_strong(value, writers); + bool success = state.compare_exchange_strong(value, writers); + if (success) + writer_thread_id.store(getThreadId()); + return success; } void SharedMutex::unlock() { + writer_thread_id.store(0); state.store(0); if (waiters) futexWakeUpperAll(state); diff --git a/src/Common/SharedMutex.h b/src/Common/SharedMutex.h index 9215ff62af3..a53e2984239 100644 --- a/src/Common/SharedMutex.h +++ b/src/Common/SharedMutex.h @@ -36,6 +36,8 @@ private: alignas(64) std::atomic state; std::atomic waiters; + /// Is set while the lock is held in exclusive mode only to facilitate debugging + std::atomic writer_thread_id; }; } From 55d1656f4d0da2f23b2df719dabeed7999645349 Mon Sep 17 00:00:00 2001 From: Alexander Gololobov Date: Fri, 19 Jul 2024 13:27:41 +0200 Subject: [PATCH 095/661] Moving is not safe, prohibit it --- src/Common/SharedMutex.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/Common/SharedMutex.h b/src/Common/SharedMutex.h index a53e2984239..c77c8765885 100644 --- a/src/Common/SharedMutex.h +++ b/src/Common/SharedMutex.h @@ -19,6 +19,8 @@ public: ~SharedMutex() = default; SharedMutex(const SharedMutex &) = delete; SharedMutex & operator=(const SharedMutex &) = delete; + SharedMutex(SharedMutex &&) = delete; + SharedMutex & operator=(SharedMutex &&) = delete; // Exclusive ownership void lock() TSA_ACQUIRE(); From 53ea5510143ded0862fd51922077a7cdc1344fe2 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Fri, 19 Jul 2024 11:30:55 +0000 Subject: [PATCH 096/661] Remove unused code --- src/QueryPipeline/RemoteQueryExecutor.cpp | 14 +------------- src/QueryPipeline/RemoteQueryExecutor.h | 2 +- 2 files changed, 2 insertions(+), 14 deletions(-) diff --git a/src/QueryPipeline/RemoteQueryExecutor.cpp b/src/QueryPipeline/RemoteQueryExecutor.cpp index b15e31a120f..b78c38a4134 100644 --- a/src/QueryPipeline/RemoteQueryExecutor.cpp +++ b/src/QueryPipeline/RemoteQueryExecutor.cpp @@ -469,7 +469,7 @@ RemoteQueryExecutor::ReadResult RemoteQueryExecutor::read() return restartQueryWithoutDuplicatedUUIDs(); } -RemoteQueryExecutor::ReadResult RemoteQueryExecutor::readAsync([[maybe_unused]] bool check_packet_type_only) +RemoteQueryExecutor::ReadResult RemoteQueryExecutor::readAsync() { #if defined(OS_LINUX) if (!read_context || (resent_query && recreate_read_context)) @@ -519,18 +519,6 @@ RemoteQueryExecutor::ReadResult RemoteQueryExecutor::readAsync([[maybe_unused]] if (read_context->isInProgress()) return ReadResult(read_context->getFileDescriptor()); - if (check_packet_type_only) - { - has_postponed_packet = true; - const auto packet_type = read_context->getPacketType(); - if (packet_type == Protocol::Server::MergeTreeReadTaskRequest - || packet_type == Protocol::Server::MergeTreeAllRangesAnnouncement) - { - return ReadResult(ReadResult::Type::ParallelReplicasToken); - } - return ReadResult(ReadResult::Type::Nothing); - } - auto read_result = processPacket(read_context->getPacket()); if (read_result.getType() == ReadResult::Type::Data || read_result.getType() == ReadResult::Type::ParallelReplicasToken) return read_result; diff --git a/src/QueryPipeline/RemoteQueryExecutor.h b/src/QueryPipeline/RemoteQueryExecutor.h index 6f56df71f1d..7289e2a2243 100644 --- a/src/QueryPipeline/RemoteQueryExecutor.h +++ b/src/QueryPipeline/RemoteQueryExecutor.h @@ -183,7 +183,7 @@ public: ReadResult read(); /// Async variant of read. Returns ready block or file descriptor which may be used for polling. - ReadResult readAsync(bool check_packet_type_only = false); + ReadResult readAsync(); /// Receive all remain packets and finish query. /// It should be cancelled after read returned empty block. From 3f66b39a18a7bf271a9a9f97dfc075866e2409eb Mon Sep 17 00:00:00 2001 From: Michael Stetsyuk Date: Fri, 19 Jul 2024 14:30:27 +0200 Subject: [PATCH 097/661] test replication lag metric --- .../0_stateless/03206_replication_lag_metric.reference | 4 ++++ .../queries/0_stateless/03206_replication_lag_metric.sql | 9 +++++++++ 2 files changed, 13 insertions(+) create mode 100644 tests/queries/0_stateless/03206_replication_lag_metric.reference create mode 100644 tests/queries/0_stateless/03206_replication_lag_metric.sql diff --git a/tests/queries/0_stateless/03206_replication_lag_metric.reference b/tests/queries/0_stateless/03206_replication_lag_metric.reference new file mode 100644 index 00000000000..02f4a7264b1 --- /dev/null +++ b/tests/queries/0_stateless/03206_replication_lag_metric.reference @@ -0,0 +1,4 @@ +0 +2 +0 +2 diff --git a/tests/queries/0_stateless/03206_replication_lag_metric.sql b/tests/queries/0_stateless/03206_replication_lag_metric.sql new file mode 100644 index 00000000000..6b86553fcaf --- /dev/null +++ b/tests/queries/0_stateless/03206_replication_lag_metric.sql @@ -0,0 +1,9 @@ +CREATE DATABASE rdb1 ENGINE = Replicated('/test/test_replication_lag_metric', 'shard1', 'replica1'); +CREATE DATABASE rdb2 ENGINE = Replicated('/test/test_replication_lag_metric', 'shard1', 'replica2'); + +SET distributed_ddl_task_timeout = 0; +CREATE TABLE rdb1.t (id UInt32) ENGINE = ReplicatedMergeTree ORDER BY id; +SELECT replication_lag FROM system.clusters; + +DROP DATABASE rdb1; +DROP DATABASE rdb2; From 245626e5789064fda39ccc7288b83162284a3617 Mon Sep 17 00:00:00 2001 From: Michael Stetsyuk Date: Fri, 19 Jul 2024 14:30:44 +0200 Subject: [PATCH 098/661] small fix --- src/Storages/System/StorageSystemClusters.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/System/StorageSystemClusters.h b/src/Storages/System/StorageSystemClusters.h index ead123aa79e..f6e08734896 100644 --- a/src/Storages/System/StorageSystemClusters.h +++ b/src/Storages/System/StorageSystemClusters.h @@ -1,10 +1,10 @@ #pragma once +#include #include #include #include - namespace DB { From 79ef630d85cb445a743ee2d5950197709d75325f Mon Sep 17 00:00:00 2001 From: Michael Stetsyuk Date: Fri, 19 Jul 2024 15:25:08 +0200 Subject: [PATCH 099/661] fix tests --- .../0_stateless/02117_show_create_table_system.reference | 2 ++ tests/queries/0_stateless/03206_replication_lag_metric.sql | 4 +++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02117_show_create_table_system.reference b/tests/queries/0_stateless/02117_show_create_table_system.reference index 8f62eda9233..28356632a66 100644 --- a/tests/queries/0_stateless/02117_show_create_table_system.reference +++ b/tests/queries/0_stateless/02117_show_create_table_system.reference @@ -52,6 +52,8 @@ CREATE TABLE system.clusters `database_shard_name` String, `database_replica_name` String, `is_active` Nullable(UInt8), + `replication_lag` Nullable(UInt32), + `recovery_time` Nullable(UInt64), `name` String ALIAS cluster ) ENGINE = SystemClusters diff --git a/tests/queries/0_stateless/03206_replication_lag_metric.sql b/tests/queries/0_stateless/03206_replication_lag_metric.sql index 6b86553fcaf..998c332a11c 100644 --- a/tests/queries/0_stateless/03206_replication_lag_metric.sql +++ b/tests/queries/0_stateless/03206_replication_lag_metric.sql @@ -1,9 +1,11 @@ +-- Tags: no-parallel + CREATE DATABASE rdb1 ENGINE = Replicated('/test/test_replication_lag_metric', 'shard1', 'replica1'); CREATE DATABASE rdb2 ENGINE = Replicated('/test/test_replication_lag_metric', 'shard1', 'replica2'); SET distributed_ddl_task_timeout = 0; CREATE TABLE rdb1.t (id UInt32) ENGINE = ReplicatedMergeTree ORDER BY id; -SELECT replication_lag FROM system.clusters; +SELECT replication_lag FROM system.clusters WHERE cluster IN ('rdb1', 'rdb2') ORDER BY cluster ASC, replica_num ASC; DROP DATABASE rdb1; DROP DATABASE rdb2; From f3fb729f53860d55db1d72ccfc88f9c5d018aea1 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Fri, 19 Jul 2024 20:12:14 +0000 Subject: [PATCH 100/661] Call onAsyncJobReady() --- src/Processors/Executors/ExecutorTasks.cpp | 2 ++ src/Processors/Executors/ExecutorTasks.h | 2 +- src/Processors/Executors/PipelineExecutor.h | 1 - 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/Processors/Executors/ExecutorTasks.cpp b/src/Processors/Executors/ExecutorTasks.cpp index 7e3bee239ef..d045f59a2e2 100644 --- a/src/Processors/Executors/ExecutorTasks.cpp +++ b/src/Processors/Executors/ExecutorTasks.cpp @@ -204,6 +204,8 @@ void ExecutorTasks::processAsyncTasks() while (auto task = async_task_queue.wait(lock)) { auto * node = static_cast(task.data); + node->processor->onAsyncJobReady(); + executor_contexts[task.thread_num]->pushAsyncTask(node); ++num_waiting_async_tasks; diff --git a/src/Processors/Executors/ExecutorTasks.h b/src/Processors/Executors/ExecutorTasks.h index 202ca253c6c..b2201873edf 100644 --- a/src/Processors/Executors/ExecutorTasks.h +++ b/src/Processors/Executors/ExecutorTasks.h @@ -28,7 +28,7 @@ class ExecutorTasks TaskQueue task_queue; /// Queue which stores tasks where processors returned Async status after prepare. - /// If multiple threads are using, main thread will wait for async tasks. + /// If multiple threads are used, main thread will wait for async tasks. /// For single thread, will wait for async tasks only when task_queue is empty. PollingQueue async_task_queue; diff --git a/src/Processors/Executors/PipelineExecutor.h b/src/Processors/Executors/PipelineExecutor.h index 03f0f7f1a0a..ae119355cb5 100644 --- a/src/Processors/Executors/PipelineExecutor.h +++ b/src/Processors/Executors/PipelineExecutor.h @@ -9,7 +9,6 @@ #include #include -#include #include From 465a34d3dfe3e313471e10d59cab8219b3e5837e Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Fri, 19 Jul 2024 20:27:57 +0000 Subject: [PATCH 101/661] Simplify, fix build --- src/QueryPipeline/RemoteQueryExecutor.cpp | 48 +++++++++++------------ 1 file changed, 22 insertions(+), 26 deletions(-) diff --git a/src/QueryPipeline/RemoteQueryExecutor.cpp b/src/QueryPipeline/RemoteQueryExecutor.cpp index b78c38a4134..61a512bcfc5 100644 --- a/src/QueryPipeline/RemoteQueryExecutor.cpp +++ b/src/QueryPipeline/RemoteQueryExecutor.cpp @@ -924,41 +924,37 @@ bool RemoteQueryExecutor::needToSkipUnavailableShard() const return context->getSettingsRef().skip_unavailable_shards && (0 == connections->size()); } -bool RemoteQueryExecutor::processParallelReplicaPacketIfAny() +bool RemoteQueryExecutor::processParallelReplicaPacketIfAny() { #if defined(OS_LINUX) + + std::lock_guard lock(was_cancelled_mutex); + if (was_cancelled) + return false; + if (!read_context || (resent_query && recreate_read_context)) { - std::lock_guard lock(was_cancelled_mutex); - if (was_cancelled) - return false; - read_context = std::make_unique(*this); recreate_read_context = false; } - { - std::lock_guard lock(was_cancelled_mutex); - if (was_cancelled) - return false; + chassert(!has_postponed_packet); - chassert(!has_postponed_packet); - - read_context->resume(); - if (read_context->isInProgress()) // <- nothing to process - return false; - - const auto packet_type = read_context->getPacketType(); - if (packet_type == Protocol::Server::MergeTreeReadTaskRequest || packet_type == Protocol::Server::MergeTreeAllRangesAnnouncement) - { - processPacket(read_context->getPacket()); - return true; - } - - has_postponed_packet = true; + read_context->resume(); + if (read_context->isInProgress()) // <- nothing to process return false; - } -#endif -} + const auto packet_type = read_context->getPacketType(); + if (packet_type == Protocol::Server::MergeTreeReadTaskRequest || packet_type == Protocol::Server::MergeTreeAllRangesAnnouncement) + { + processPacket(read_context->getPacket()); + return true; + } + + has_postponed_packet = true; + +#endif + + return false; +} } From a373b62bbf8083ffa96210fc1c959f13939526fc Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 20 Jul 2024 00:04:52 +0200 Subject: [PATCH 102/661] Better diagnostics in functional tests --- docker/test/stateless/run.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh index 0647ed02839..f43bb5da33d 100755 --- a/docker/test/stateless/run.sh +++ b/docker/test/stateless/run.sh @@ -264,7 +264,7 @@ if [ "$NUM_TRIES" -gt "1" ]; then # We don't run tests with Ordinary database in PRs, only in master. # So run new/changed tests with Ordinary at least once in flaky check. timeout_with_logging "$TIMEOUT" bash -c 'NUM_TRIES=1; USE_DATABASE_ORDINARY=1; run_tests' \ - | sed 's/All tests have finished//' | sed 's/No tests were run//' ||: + | sed 's/All tests have finished/Redacted: a message about tests finish is deleted/' | sed 's/No tests were run/Redacted: a message about no tests run is deleted/' ||: fi timeout_with_logging "$TIMEOUT" bash -c run_tests ||: From 134c0065407bd3f9394a720fbdfef7edf241ef84 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 20 Jul 2024 00:08:25 +0200 Subject: [PATCH 103/661] Whitespace --- docker/test/stateless/utils.lib | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/test/stateless/utils.lib b/docker/test/stateless/utils.lib index c3bb8ae9ea4..6b7b659296b 100644 --- a/docker/test/stateless/utils.lib +++ b/docker/test/stateless/utils.lib @@ -47,7 +47,7 @@ function timeout_with_logging() { if [[ "${exit_code}" -eq "124" ]] then - echo "The command 'timeout ${*}' has been killed by timeout" + echo "The command 'timeout ${*}' has been killed by timeout" fi return $exit_code From ba6b7b86ba3e868cd001efbce2c6cf8a5236a024 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 20 Jul 2024 00:09:53 +0200 Subject: [PATCH 104/661] Log messages --- docker/test/stateless/utils.lib | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/docker/test/stateless/utils.lib b/docker/test/stateless/utils.lib index 6b7b659296b..cb257536c36 100644 --- a/docker/test/stateless/utils.lib +++ b/docker/test/stateless/utils.lib @@ -45,9 +45,12 @@ function timeout_with_logging() { timeout -s TERM --preserve-status "${@}" || exit_code="${?}" + echo "Checking if it is a timeout. The code 124 will indicate a timeout." if [[ "${exit_code}" -eq "124" ]] then - echo "The command 'timeout ${*}' has been killed by timeout" + echo "The command 'timeout ${*}' has been killed by timeout." + else + echo "No, it isn't a timeout." fi return $exit_code From 5ae3a421e0bf90c3d1755371fe1d6ff5662207ca Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 20 Jul 2024 00:12:02 +0200 Subject: [PATCH 105/661] Copy-paste --- docker/test/fasttest/run.sh | 5 ++++- docker/test/stateful/run.sh | 5 ++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/docker/test/fasttest/run.sh b/docker/test/fasttest/run.sh index 970bf12a81a..26283afc86a 100755 --- a/docker/test/fasttest/run.sh +++ b/docker/test/fasttest/run.sh @@ -261,9 +261,12 @@ function timeout_with_logging() { timeout -s TERM --preserve-status "${@}" || exit_code="${?}" + echo "Checking if it is a timeout. The code 124 will indicate a timeout." if [[ "${exit_code}" -eq "124" ]] then - echo "The command 'timeout ${*}' has been killed by timeout" + echo "The command 'timeout ${*}' has been killed by timeout." + else + echo "No, it isn't a timeout." fi return $exit_code diff --git a/docker/test/stateful/run.sh b/docker/test/stateful/run.sh index 857385f4715..3a0e3a8be48 100755 --- a/docker/test/stateful/run.sh +++ b/docker/test/stateful/run.sh @@ -251,9 +251,12 @@ function timeout_with_logging() { timeout -s TERM --preserve-status "${@}" || exit_code="${?}" + echo "Checking if it is a timeout. The code 124 will indicate a timeout." if [[ "${exit_code}" -eq "124" ]] then - echo "The command 'timeout ${*}' has been killed by timeout" + echo "The command 'timeout ${*}' has been killed by timeout." + else + echo "No, it isn't a timeout." fi return $exit_code From 444303cb7117c92e578ad4ea20f7c0001edb3c8b Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 20 Jul 2024 00:25:57 +0200 Subject: [PATCH 106/661] Better diagnostics --- docker/test/stateless/run.sh | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh index f43bb5da33d..b24af431ff1 100755 --- a/docker/test/stateless/run.sh +++ b/docker/test/stateless/run.sh @@ -247,12 +247,22 @@ function run_tests() try_run_with_retry 10 clickhouse-client -q "insert into system.zookeeper (name, path, value) values ('auxiliary_zookeeper2', '/test/chroot/', '')" + TIMEOUT=$((MAX_RUN_TIME - 800 > 8400 ? 8400 : MAX_RUN_TIME - 800)) + START_TIME=${SECONDS} set +e - timeout -k 60m -s TERM --preserve-status 140m clickhouse-test --testname --shard --zookeeper --check-zookeeper-session --hung-check --print-time \ - --no-drop-if-fail --test-runs "$NUM_TRIES" "${ADDITIONAL_OPTIONS[@]}" 2>&1 \ + timeout --preserve-status --signal TERM --kill-after 60m ${TIMEOUT}s \ + clickhouse-test --testname --shard --zookeeper --check-zookeeper-session --hung-check --print-time \ + --no-drop-if-fail --test-runs "$NUM_TRIES" "${ADDITIONAL_OPTIONS[@]}" 2>&1 \ | ts '%Y-%m-%d %H:%M:%S' \ | tee -a test_output/test_result.txt set -e + DURATION=$((START_TIME - SECONDS)) + + echo "Elapsed ${DURATION} seconds." + if [[ $DURATION -ge $TIMEOUT ]] + then + echo "It looks like the command is terminated by the timeout, which is ${TIMEOUT} seconds." + fi } export -f run_tests From 8786d9b5dd5f93d5c1f22c4d618093d69f8d57a3 Mon Sep 17 00:00:00 2001 From: joelynch Date: Sun, 21 Jul 2024 20:00:38 +0200 Subject: [PATCH 107/661] Ensure COMMENT clause works for all table engines --- src/Databases/SQLite/DatabaseSQLite.cpp | 1 + src/Storages/Kafka/StorageKafka.cpp | 10 +++++++--- src/Storages/Kafka/StorageKafka.h | 1 + src/Storages/NATS/StorageNATS.cpp | 4 +++- src/Storages/NATS/StorageNATS.h | 1 + .../PostgreSQL/StorageMaterializedPostgreSQL.cpp | 1 + src/Storages/RabbitMQ/StorageRabbitMQ.cpp | 4 +++- src/Storages/RabbitMQ/StorageRabbitMQ.h | 1 + src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp | 1 + src/Storages/StorageExecutable.cpp | 7 ++++--- src/Storages/StorageExecutable.h | 3 ++- src/Storages/StorageKeeperMap.cpp | 1 + src/Storages/StorageSQLite.cpp | 4 +++- src/Storages/StorageSQLite.h | 1 + src/Storages/WindowView/StorageWindowView.cpp | 4 +++- src/Storages/WindowView/StorageWindowView.h | 1 + src/TableFunctions/TableFunctionExecutable.cpp | 9 ++++++++- src/TableFunctions/TableFunctionSQLite.cpp | 2 +- 18 files changed, 43 insertions(+), 13 deletions(-) diff --git a/src/Databases/SQLite/DatabaseSQLite.cpp b/src/Databases/SQLite/DatabaseSQLite.cpp index 132a978140c..471730fce29 100644 --- a/src/Databases/SQLite/DatabaseSQLite.cpp +++ b/src/Databases/SQLite/DatabaseSQLite.cpp @@ -154,6 +154,7 @@ StoragePtr DatabaseSQLite::fetchTable(const String & table_name, ContextPtr loca table_name, ColumnsDescription{*columns}, ConstraintsDescription{}, + /* comment = */ "", local_context); return storage; diff --git a/src/Storages/Kafka/StorageKafka.cpp b/src/Storages/Kafka/StorageKafka.cpp index 809401bb279..3aad64a0cfb 100644 --- a/src/Storages/Kafka/StorageKafka.cpp +++ b/src/Storages/Kafka/StorageKafka.cpp @@ -418,8 +418,11 @@ namespace } StorageKafka::StorageKafka( - const StorageID & table_id_, ContextPtr context_, - const ColumnsDescription & columns_, std::unique_ptr kafka_settings_, + const StorageID & table_id_, + ContextPtr context_, + const ColumnsDescription & columns_, + const String & comment, + std::unique_ptr kafka_settings_, const String & collection_name_) : IStorage(table_id_) , WithContext(context_->getGlobalContext()) @@ -451,6 +454,7 @@ StorageKafka::StorageKafka( StorageInMemoryMetadata storage_metadata; storage_metadata.setColumns(columns_); + storage_metadata.setComment(comment); setInMemoryMetadata(storage_metadata); setVirtuals(createVirtuals(kafka_settings->kafka_handle_error_mode)); @@ -1317,7 +1321,7 @@ void registerStorageKafka(StorageFactory & factory) "See https://clickhouse.com/docs/en/engines/table-engines/integrations/kafka/#configuration"); } - return std::make_shared(args.table_id, args.getContext(), args.columns, std::move(kafka_settings), collection_name); + return std::make_shared(args.table_id, args.getContext(), args.columns, args.comment, std::move(kafka_settings), collection_name); }; factory.registerStorage("Kafka", creator_fn, StorageFactory::StorageFeatures{ .supports_settings = true, }); diff --git a/src/Storages/Kafka/StorageKafka.h b/src/Storages/Kafka/StorageKafka.h index fa4affbda36..31e1a6076b6 100644 --- a/src/Storages/Kafka/StorageKafka.h +++ b/src/Storages/Kafka/StorageKafka.h @@ -40,6 +40,7 @@ public: const StorageID & table_id_, ContextPtr context_, const ColumnsDescription & columns_, + const String & comment, std::unique_ptr kafka_settings_, const String & collection_name_); diff --git a/src/Storages/NATS/StorageNATS.cpp b/src/Storages/NATS/StorageNATS.cpp index 8f0e2d76473..9d728c3395f 100644 --- a/src/Storages/NATS/StorageNATS.cpp +++ b/src/Storages/NATS/StorageNATS.cpp @@ -49,6 +49,7 @@ StorageNATS::StorageNATS( const StorageID & table_id_, ContextPtr context_, const ColumnsDescription & columns_, + const String & comment, std::unique_ptr nats_settings_, LoadingStrictnessLevel mode) : IStorage(table_id_) @@ -87,6 +88,7 @@ StorageNATS::StorageNATS( StorageInMemoryMetadata storage_metadata; storage_metadata.setColumns(columns_); + storage_metadata.setComment(comment); setInMemoryMetadata(storage_metadata); setVirtuals(createVirtuals(nats_settings->nats_handle_error_mode)); @@ -760,7 +762,7 @@ void registerStorageNATS(StorageFactory & factory) if (!nats_settings->nats_subjects.changed) throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "You must specify `nats_subjects` setting"); - return std::make_shared(args.table_id, args.getContext(), args.columns, std::move(nats_settings), args.mode); + return std::make_shared(args.table_id, args.getContext(), args.columns, args.comment, std::move(nats_settings), args.mode); }; factory.registerStorage("NATS", creator_fn, StorageFactory::StorageFeatures{ .supports_settings = true, }); diff --git a/src/Storages/NATS/StorageNATS.h b/src/Storages/NATS/StorageNATS.h index 41d77acfde6..5fca8cb0163 100644 --- a/src/Storages/NATS/StorageNATS.h +++ b/src/Storages/NATS/StorageNATS.h @@ -23,6 +23,7 @@ public: const StorageID & table_id_, ContextPtr context_, const ColumnsDescription & columns_, + const String & comment, std::unique_ptr nats_settings_, LoadingStrictnessLevel mode); diff --git a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp index a904b29e12f..f4c38a52a3f 100644 --- a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp +++ b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp @@ -571,6 +571,7 @@ void registerStorageMaterializedPostgreSQL(StorageFactory & factory) StorageInMemoryMetadata metadata; metadata.setColumns(args.columns); metadata.setConstraints(args.constraints); + metadata.setComment(args.comment); if (args.mode <= LoadingStrictnessLevel::CREATE && !args.getLocalContext()->getSettingsRef().allow_experimental_materialized_postgresql_table) diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp index f3d2aff68c8..9e3c40071b5 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp @@ -70,6 +70,7 @@ StorageRabbitMQ::StorageRabbitMQ( const StorageID & table_id_, ContextPtr context_, const ColumnsDescription & columns_, + const String & comment, std::unique_ptr rabbitmq_settings_, LoadingStrictnessLevel mode) : IStorage(table_id_) @@ -145,6 +146,7 @@ StorageRabbitMQ::StorageRabbitMQ( StorageInMemoryMetadata storage_metadata; storage_metadata.setColumns(columns_); + storage_metadata.setComment(comment); setInMemoryMetadata(storage_metadata); setVirtuals(createVirtuals(rabbitmq_settings->rabbitmq_handle_error_mode)); @@ -1288,7 +1290,7 @@ void registerStorageRabbitMQ(StorageFactory & factory) if (!rabbitmq_settings->rabbitmq_format.changed) throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "You must specify `rabbitmq_format` setting"); - return std::make_shared(args.table_id, args.getContext(), args.columns, std::move(rabbitmq_settings), args.mode); + return std::make_shared(args.table_id, args.getContext(), args.columns, args.comment, std::move(rabbitmq_settings), args.mode); }; factory.registerStorage("RabbitMQ", creator_fn, StorageFactory::StorageFeatures{ .supports_settings = true, }); diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.h b/src/Storages/RabbitMQ/StorageRabbitMQ.h index b8fab5825e4..fed80a4357b 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.h +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.h @@ -26,6 +26,7 @@ public: const StorageID & table_id_, ContextPtr context_, const ColumnsDescription & columns_, + const String & comment, std::unique_ptr rabbitmq_settings_, LoadingStrictnessLevel mode); diff --git a/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp b/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp index 409703c84c6..fafc72da04e 100644 --- a/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp +++ b/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp @@ -691,6 +691,7 @@ static StoragePtr create(const StorageFactory::Arguments & args) StorageInMemoryMetadata metadata; metadata.setColumns(args.columns); metadata.setConstraints(args.constraints); + metadata.setComment(args.comment); if (!args.storage_def->primary_key) throw Exception(ErrorCodes::BAD_ARGUMENTS, "StorageEmbeddedRocksDB must require one column in primary key"); diff --git a/src/Storages/StorageExecutable.cpp b/src/Storages/StorageExecutable.cpp index 381c20c616d..0094723e3fd 100644 --- a/src/Storages/StorageExecutable.cpp +++ b/src/Storages/StorageExecutable.cpp @@ -77,7 +77,8 @@ StorageExecutable::StorageExecutable( const ExecutableSettings & settings_, const std::vector & input_queries_, const ColumnsDescription & columns, - const ConstraintsDescription & constraints) + const ConstraintsDescription & constraints, + const String & comment) : IStorage(table_id_) , settings(settings_) , input_queries(input_queries_) @@ -86,6 +87,7 @@ StorageExecutable::StorageExecutable( StorageInMemoryMetadata storage_metadata; storage_metadata.setColumns(columns); storage_metadata.setConstraints(constraints); + storage_metadata.setComment(comment); setInMemoryMetadata(storage_metadata); ShellCommandSourceCoordinator::Configuration configuration @@ -237,7 +239,7 @@ void registerStorageExecutable(StorageFactory & factory) settings.loadFromQuery(*args.storage_def); auto global_context = args.getContext()->getGlobalContext(); - return std::make_shared(args.table_id, format, settings, input_queries, columns, constraints); + return std::make_shared(args.table_id, format, settings, input_queries, columns, constraints, args.comment); }; StorageFactory::StorageFeatures storage_features; @@ -255,4 +257,3 @@ void registerStorageExecutable(StorageFactory & factory) } } - diff --git a/src/Storages/StorageExecutable.h b/src/Storages/StorageExecutable.h index 2be2a84ab49..6748bb3223e 100644 --- a/src/Storages/StorageExecutable.h +++ b/src/Storages/StorageExecutable.h @@ -22,7 +22,8 @@ public: const ExecutableSettings & settings, const std::vector & input_queries, const ColumnsDescription & columns, - const ConstraintsDescription & constraints); + const ConstraintsDescription & constraints, + const String & comment); String getName() const override { diff --git a/src/Storages/StorageKeeperMap.cpp b/src/Storages/StorageKeeperMap.cpp index 587cb621362..16caf01955e 100644 --- a/src/Storages/StorageKeeperMap.cpp +++ b/src/Storages/StorageKeeperMap.cpp @@ -1280,6 +1280,7 @@ StoragePtr create(const StorageFactory::Arguments & args) StorageInMemoryMetadata metadata; metadata.setColumns(args.columns); metadata.setConstraints(args.constraints); + metadata.setComment(args.comment); if (!args.storage_def->primary_key) throw Exception(ErrorCodes::BAD_ARGUMENTS, "StorageKeeperMap requires one column in primary key"); diff --git a/src/Storages/StorageSQLite.cpp b/src/Storages/StorageSQLite.cpp index 85417a2f2a4..b90b15f3b99 100644 --- a/src/Storages/StorageSQLite.cpp +++ b/src/Storages/StorageSQLite.cpp @@ -50,6 +50,7 @@ StorageSQLite::StorageSQLite( const String & remote_table_name_, const ColumnsDescription & columns_, const ConstraintsDescription & constraints_, + const String & comment, ContextPtr context_) : IStorage(table_id_) , WithContext(context_->getGlobalContext()) @@ -71,6 +72,7 @@ StorageSQLite::StorageSQLite( storage_metadata.setConstraints(constraints_); setInMemoryMetadata(storage_metadata); + storage_metadata.setComment(comment); } @@ -211,7 +213,7 @@ void registerStorageSQLite(StorageFactory & factory) auto sqlite_db = openSQLiteDB(database_path, args.getContext(), /* throw_on_error */ args.mode <= LoadingStrictnessLevel::CREATE); return std::make_shared(args.table_id, sqlite_db, database_path, - table_name, args.columns, args.constraints, args.getContext()); + table_name, args.columns, args.constraints, args.comment, args.getContext()); }, { .supports_schema_inference = true, diff --git a/src/Storages/StorageSQLite.h b/src/Storages/StorageSQLite.h index ed673123fe0..97638ac04cb 100644 --- a/src/Storages/StorageSQLite.h +++ b/src/Storages/StorageSQLite.h @@ -27,6 +27,7 @@ public: const String & remote_table_name_, const ColumnsDescription & columns_, const ConstraintsDescription & constraints_, + const String & comment, ContextPtr context_); std::string getName() const override { return "SQLite"; } diff --git a/src/Storages/WindowView/StorageWindowView.cpp b/src/Storages/WindowView/StorageWindowView.cpp index e15da0074d5..7e1bca7d0d6 100644 --- a/src/Storages/WindowView/StorageWindowView.cpp +++ b/src/Storages/WindowView/StorageWindowView.cpp @@ -1188,6 +1188,7 @@ StorageWindowView::StorageWindowView( ContextPtr context_, const ASTCreateQuery & query, const ColumnsDescription & columns_, + const String & comment, LoadingStrictnessLevel mode) : IStorage(table_id_) , WithContext(context_->getGlobalContext()) @@ -1206,6 +1207,7 @@ StorageWindowView::StorageWindowView( StorageInMemoryMetadata storage_metadata; storage_metadata.setColumns(columns_); + storage_metadata.setComment(comment); setInMemoryMetadata(storage_metadata); /// If the target table is not set, use inner target table @@ -1761,7 +1763,7 @@ void registerStorageWindowView(StorageFactory & factory) "Experimental WINDOW VIEW feature " "is not enabled (the setting 'allow_experimental_window_view')"); - return std::make_shared(args.table_id, args.getLocalContext(), args.query, args.columns, args.mode); + return std::make_shared(args.table_id, args.getLocalContext(), args.query, args.columns, args.comment, args.mode); }); } diff --git a/src/Storages/WindowView/StorageWindowView.h b/src/Storages/WindowView/StorageWindowView.h index 14ac65091d3..38fca512ed9 100644 --- a/src/Storages/WindowView/StorageWindowView.h +++ b/src/Storages/WindowView/StorageWindowView.h @@ -111,6 +111,7 @@ public: ContextPtr context_, const ASTCreateQuery & query, const ColumnsDescription & columns_, + const String & comment, LoadingStrictnessLevel mode); String getName() const override { return "WindowView"; } diff --git a/src/TableFunctions/TableFunctionExecutable.cpp b/src/TableFunctions/TableFunctionExecutable.cpp index 2c3802e8667..cccd3587bc7 100644 --- a/src/TableFunctions/TableFunctionExecutable.cpp +++ b/src/TableFunctions/TableFunctionExecutable.cpp @@ -170,7 +170,14 @@ StoragePtr TableFunctionExecutable::executeImpl(const ASTPtr & /*ast_function*/, if (settings_query != nullptr) settings.applyChanges(settings_query->as()->changes); - auto storage = std::make_shared(storage_id, format, settings, input_queries, getActualTableStructure(context, is_insert_query), ConstraintsDescription{}); + auto storage = std::make_shared( + storage_id, + format, + settings, + input_queries, + getActualTableStructure(context, is_insert_query), + ConstraintsDescription{}, + /* comment = */ ""); storage->startup(); return storage; } diff --git a/src/TableFunctions/TableFunctionSQLite.cpp b/src/TableFunctions/TableFunctionSQLite.cpp index e367e05bf73..87353025d1d 100644 --- a/src/TableFunctions/TableFunctionSQLite.cpp +++ b/src/TableFunctions/TableFunctionSQLite.cpp @@ -57,7 +57,7 @@ StoragePtr TableFunctionSQLite::executeImpl(const ASTPtr & /*ast_function*/, sqlite_db, database_path, remote_table_name, - cached_columns, ConstraintsDescription{}, context); + cached_columns, ConstraintsDescription{}, /* comment = */ "", context); storage->startup(); return storage; From db549c93a18f49540676ae53bc04e75b85705ddb Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 22 Jul 2024 07:34:34 +0200 Subject: [PATCH 108/661] Fix error --- src/IO/ReadWriteBufferFromHTTP.cpp | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/src/IO/ReadWriteBufferFromHTTP.cpp b/src/IO/ReadWriteBufferFromHTTP.cpp index 4d27a78c8dc..cea1a272401 100644 --- a/src/IO/ReadWriteBufferFromHTTP.cpp +++ b/src/IO/ReadWriteBufferFromHTTP.cpp @@ -123,7 +123,16 @@ void ReadWriteBufferFromHTTP::prepareRequest(Poco::Net::HTTPRequest & request, s std::optional ReadWriteBufferFromHTTP::tryGetFileSize() { if (!file_info) - file_info = getFileInfo(); + { + try + { + file_info = getFileInfo(); + } + catch (const HTTPException & e) + { + return std::nullopt; + } + } return file_info->file_size; } @@ -679,7 +688,7 @@ std::optional ReadWriteBufferFromHTTP::tryGetLastModificationTime() { file_info = getFileInfo(); } - catch (...) + catch (const HTTPException & e) { return std::nullopt; } @@ -700,7 +709,7 @@ ReadWriteBufferFromHTTP::HTTPFileInfo ReadWriteBufferFromHTTP::getFileInfo() { getHeadResponse(response); } - catch (HTTPException & e) + catch (const HTTPException & e) { /// Maybe the web server doesn't support HEAD requests. /// E.g. webhdfs reports status 400. From 32f624eebaa560f4c9d6bf9145931270098e8db1 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 22 Jul 2024 07:35:10 +0200 Subject: [PATCH 109/661] Fix error --- src/IO/ReadWriteBufferFromHTTP.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/IO/ReadWriteBufferFromHTTP.cpp b/src/IO/ReadWriteBufferFromHTTP.cpp index cea1a272401..961e8dd6425 100644 --- a/src/IO/ReadWriteBufferFromHTTP.cpp +++ b/src/IO/ReadWriteBufferFromHTTP.cpp @@ -128,7 +128,7 @@ std::optional ReadWriteBufferFromHTTP::tryGetFileSize() { file_info = getFileInfo(); } - catch (const HTTPException & e) + catch (const HTTPException &) { return std::nullopt; } From dc601dc7455895574143f5baf345731d437bf8d3 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 22 Jul 2024 07:37:15 +0200 Subject: [PATCH 110/661] Fix error --- src/IO/ReadWriteBufferFromHTTP.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/IO/ReadWriteBufferFromHTTP.cpp b/src/IO/ReadWriteBufferFromHTTP.cpp index 961e8dd6425..85230957b3f 100644 --- a/src/IO/ReadWriteBufferFromHTTP.cpp +++ b/src/IO/ReadWriteBufferFromHTTP.cpp @@ -688,7 +688,7 @@ std::optional ReadWriteBufferFromHTTP::tryGetLastModificationTime() { file_info = getFileInfo(); } - catch (const HTTPException & e) + catch (const HTTPException &) { return std::nullopt; } From a564f70b66367ee4363d46e45eb5a9c66f131fec Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 22 Jul 2024 08:09:39 +0200 Subject: [PATCH 111/661] Fix error --- src/Parsers/ParserAlterQuery.cpp | 2 -- src/Parsers/ParserCreateIndexQuery.cpp | 4 ++-- src/Parsers/ParserCreateQuery.cpp | 4 ++-- 3 files changed, 4 insertions(+), 6 deletions(-) diff --git a/src/Parsers/ParserAlterQuery.cpp b/src/Parsers/ParserAlterQuery.cpp index 28dbf781011..dbefb0cb966 100644 --- a/src/Parsers/ParserAlterQuery.cpp +++ b/src/Parsers/ParserAlterQuery.cpp @@ -9,8 +9,6 @@ #include #include #include -#include -#include #include #include #include diff --git a/src/Parsers/ParserCreateIndexQuery.cpp b/src/Parsers/ParserCreateIndexQuery.cpp index 2fa34696c58..ddefb3d37fb 100644 --- a/src/Parsers/ParserCreateIndexQuery.cpp +++ b/src/Parsers/ParserCreateIndexQuery.cpp @@ -21,7 +21,7 @@ bool ParserCreateIndexDeclaration::parseImpl(Pos & pos, ASTPtr & node, Expected ParserToken close_p(TokenType::ClosingRoundBracket); ParserOrderByExpressionList order_list_p; - ParserDataType data_type_p; + ParserFunction type_p; ParserExpression expression_p; ParserUnsignedInteger granularity_p; @@ -68,7 +68,7 @@ bool ParserCreateIndexDeclaration::parseImpl(Pos & pos, ASTPtr & node, Expected if (s_type.ignore(pos, expected)) { - if (!data_type_p.parse(pos, type, expected)) + if (!type_p.parse(pos, type, expected)) return false; } diff --git a/src/Parsers/ParserCreateQuery.cpp b/src/Parsers/ParserCreateQuery.cpp index fa232954cd6..3dba58546af 100644 --- a/src/Parsers/ParserCreateQuery.cpp +++ b/src/Parsers/ParserCreateQuery.cpp @@ -179,7 +179,7 @@ bool ParserIndexDeclaration::parseImpl(Pos & pos, ASTPtr & node, Expected & expe ParserKeyword s_granularity(Keyword::GRANULARITY); ParserIdentifier name_p; - ParserDataType data_type_p; + ParserFunction type_p; ParserExpression expression_p; ParserUnsignedInteger granularity_p; @@ -197,7 +197,7 @@ bool ParserIndexDeclaration::parseImpl(Pos & pos, ASTPtr & node, Expected & expe if (!s_type.ignore(pos, expected)) return false; - if (!data_type_p.parse(pos, type, expected)) + if (!type_p.parse(pos, type, expected)) return false; if (s_granularity.ignore(pos, expected)) From 8d7471f8bd2e0c6dc242231c4358448787e6c56f Mon Sep 17 00:00:00 2001 From: Alexander Gololobov Date: Sat, 20 Jul 2024 00:03:40 +0200 Subject: [PATCH 112/661] Fix for deadlock in getDDLWorker --- src/Interpreters/Context.cpp | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 94bcb88ed53..48878733a00 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -3490,18 +3490,22 @@ DDLWorker & Context::getDDLWorker() const if (shared->ddl_worker_startup_task) waitLoad(shared->ddl_worker_startup_task); // Just wait and do not prioritize, because it depends on all load and startup tasks - SharedLockGuard lock(shared->mutex); - if (!shared->ddl_worker) { - if (!hasZooKeeper()) - throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG, "There is no Zookeeper configuration in server config"); - - if (!hasDistributedDDL()) - throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG, "There is no DistributedDDL configuration in server config"); - - throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG, "DDL background thread is not initialized"); + /// Only acquire the lock for reading ddl_worker field. + /// hasZooKeeper() and hasDistributedDDL() acquire the same lock as well and double acquisition of the lock in shared mode can lead + /// to a deadlock if an exclusive lock attempt is made in the meantime by another thread. + SharedLockGuard lock(shared->mutex); + if (shared->ddl_worker) + return *shared->ddl_worker; } - return *shared->ddl_worker; + + if (!hasZooKeeper()) + throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG, "There is no Zookeeper configuration in server config"); + + if (!hasDistributedDDL()) + throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG, "There is no DistributedDDL configuration in server config"); + + throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG, "DDL background thread is not initialized"); } zkutil::ZooKeeperPtr Context::getZooKeeper() const From bbbf2fec88a331281bf51d3a02bd3f476e3bf6ab Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 22 Jul 2024 08:37:54 +0200 Subject: [PATCH 113/661] Fix error --- src/Parsers/ParserCreateIndexQuery.cpp | 9 +++++++-- src/Parsers/ParserCreateQuery.cpp | 7 ++++++- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/src/Parsers/ParserCreateIndexQuery.cpp b/src/Parsers/ParserCreateIndexQuery.cpp index ddefb3d37fb..8a4c1c0b17a 100644 --- a/src/Parsers/ParserCreateIndexQuery.cpp +++ b/src/Parsers/ParserCreateIndexQuery.cpp @@ -7,9 +7,9 @@ #include #include #include -#include #include + namespace DB { @@ -69,7 +69,12 @@ bool ParserCreateIndexDeclaration::parseImpl(Pos & pos, ASTPtr & node, Expected if (s_type.ignore(pos, expected)) { if (!type_p.parse(pos, type, expected)) - return false; + { + if (ParserIdentifier().parse(pos, type, expected)) + type = makeASTFunction(type->as().name()); + else + return false; + } } if (s_granularity.ignore(pos, expected)) diff --git a/src/Parsers/ParserCreateQuery.cpp b/src/Parsers/ParserCreateQuery.cpp index 3dba58546af..bff5da4a536 100644 --- a/src/Parsers/ParserCreateQuery.cpp +++ b/src/Parsers/ParserCreateQuery.cpp @@ -198,7 +198,12 @@ bool ParserIndexDeclaration::parseImpl(Pos & pos, ASTPtr & node, Expected & expe return false; if (!type_p.parse(pos, type, expected)) - return false; + { + if (name_p.parse(pos, type, expected)) + type = makeASTFunction(type->as().name()); + else + return false; + } if (s_granularity.ignore(pos, expected)) { From 885acd3aa80d421e82f75150b4152e227ca0fba4 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 22 Jul 2024 09:42:20 +0200 Subject: [PATCH 114/661] Compatibility --- src/Parsers/ParserCreateIndexQuery.cpp | 3 +++ src/Parsers/ParserCreateQuery.cpp | 3 +++ 2 files changed, 6 insertions(+) diff --git a/src/Parsers/ParserCreateIndexQuery.cpp b/src/Parsers/ParserCreateIndexQuery.cpp index 8a4c1c0b17a..2761c99738b 100644 --- a/src/Parsers/ParserCreateIndexQuery.cpp +++ b/src/Parsers/ParserCreateIndexQuery.cpp @@ -71,7 +71,10 @@ bool ParserCreateIndexDeclaration::parseImpl(Pos & pos, ASTPtr & node, Expected if (!type_p.parse(pos, type, expected)) { if (ParserIdentifier().parse(pos, type, expected)) + { type = makeASTFunction(type->as().name()); + type->as().no_empty_args = true; + } else return false; } diff --git a/src/Parsers/ParserCreateQuery.cpp b/src/Parsers/ParserCreateQuery.cpp index bff5da4a536..9aaecd84f59 100644 --- a/src/Parsers/ParserCreateQuery.cpp +++ b/src/Parsers/ParserCreateQuery.cpp @@ -200,7 +200,10 @@ bool ParserIndexDeclaration::parseImpl(Pos & pos, ASTPtr & node, Expected & expe if (!type_p.parse(pos, type, expected)) { if (name_p.parse(pos, type, expected)) + { type = makeASTFunction(type->as().name()); + type->as().no_empty_args = true; + } else return false; } From c2ac13291f3bf201f7189bd36f2c9be7c06aa886 Mon Sep 17 00:00:00 2001 From: Michael Stetsyuk Date: Mon, 22 Jul 2024 09:06:13 +0100 Subject: [PATCH 115/661] fix tests --- tests/integration/test_recovery_time_metric/test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_recovery_time_metric/test.py b/tests/integration/test_recovery_time_metric/test.py index 90155f81ba2..e4a44103b76 100644 --- a/tests/integration/test_recovery_time_metric/test.py +++ b/tests/integration/test_recovery_time_metric/test.py @@ -3,7 +3,7 @@ from helpers.cluster import ClickHouseCluster cluster = ClickHouseCluster(__file__) node = cluster.add_instance( - "node", main_configs=["configs/config.xml"], with_zookeeper=True + "node", main_configs=["configs/config.xml"], with_zookeeper=True, stay_alive=True, ) From d040e436f3a1f8594070b04ec10cbf7391f6994a Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Mon, 22 Jul 2024 08:18:58 +0000 Subject: [PATCH 116/661] Automatic style fix --- tests/integration/test_recovery_time_metric/test.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/integration/test_recovery_time_metric/test.py b/tests/integration/test_recovery_time_metric/test.py index e4a44103b76..4dad844b950 100644 --- a/tests/integration/test_recovery_time_metric/test.py +++ b/tests/integration/test_recovery_time_metric/test.py @@ -3,7 +3,10 @@ from helpers.cluster import ClickHouseCluster cluster = ClickHouseCluster(__file__) node = cluster.add_instance( - "node", main_configs=["configs/config.xml"], with_zookeeper=True, stay_alive=True, + "node", + main_configs=["configs/config.xml"], + with_zookeeper=True, + stay_alive=True, ) From 41218ad01889cce72569a840461b54dbf9f4b832 Mon Sep 17 00:00:00 2001 From: Nikita Fomichev Date: Mon, 22 Jul 2024 12:03:21 +0200 Subject: [PATCH 117/661] Stateless tests: add no-parallel tag for high-load tests --- .../01076_cache_dictionary_datarace_exception_ptr.sh | 2 +- .../0_stateless/01171_mv_select_insert_isolation_long.sh | 2 +- .../0_stateless/01301_aggregate_state_exception_memory_leak.sh | 2 +- .../0_stateless/01302_aggregate_state_exception_memory_leak.sh | 2 +- tests/queries/0_stateless/02372_data_race_in_avro.sh | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/queries/0_stateless/01076_cache_dictionary_datarace_exception_ptr.sh b/tests/queries/0_stateless/01076_cache_dictionary_datarace_exception_ptr.sh index dcd15718416..e003d2a26da 100755 --- a/tests/queries/0_stateless/01076_cache_dictionary_datarace_exception_ptr.sh +++ b/tests/queries/0_stateless/01076_cache_dictionary_datarace_exception_ptr.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: race +# Tags: race, no-parallel # This is a monkey test used to trigger sanitizers. diff --git a/tests/queries/0_stateless/01171_mv_select_insert_isolation_long.sh b/tests/queries/0_stateless/01171_mv_select_insert_isolation_long.sh index 2ab7f883367..8344bb6f426 100755 --- a/tests/queries/0_stateless/01171_mv_select_insert_isolation_long.sh +++ b/tests/queries/0_stateless/01171_mv_select_insert_isolation_long.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long, no-ordinary-database, no-debug +# Tags: long, no-parallel, no-ordinary-database, no-debug # Test is too heavy, avoid parallel run in Flaky Check # shellcheck disable=SC2119 diff --git a/tests/queries/0_stateless/01301_aggregate_state_exception_memory_leak.sh b/tests/queries/0_stateless/01301_aggregate_state_exception_memory_leak.sh index 47fe7a9c7d9..d74092d828d 100755 --- a/tests/queries/0_stateless/01301_aggregate_state_exception_memory_leak.sh +++ b/tests/queries/0_stateless/01301_aggregate_state_exception_memory_leak.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-fasttest +# Tags: no-fasttest, no-parallel CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/01302_aggregate_state_exception_memory_leak.sh b/tests/queries/0_stateless/01302_aggregate_state_exception_memory_leak.sh index a521accb082..bbf2fd9177a 100755 --- a/tests/queries/0_stateless/01302_aggregate_state_exception_memory_leak.sh +++ b/tests/queries/0_stateless/01302_aggregate_state_exception_memory_leak.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-fasttest +# Tags: no-fasttest, no-parallel CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/02372_data_race_in_avro.sh b/tests/queries/0_stateless/02372_data_race_in_avro.sh index 49c34e31923..50a7ae1e3c5 100755 --- a/tests/queries/0_stateless/02372_data_race_in_avro.sh +++ b/tests/queries/0_stateless/02372_data_race_in_avro.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-fasttest +# Tags: no-fasttest, no-parallel CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh From bb0b29f6e50e74098fcc8a9b83150998f1bc2601 Mon Sep 17 00:00:00 2001 From: Alexander Gololobov Date: Mon, 22 Jul 2024 12:11:56 +0200 Subject: [PATCH 118/661] Set writer_thread_id earlier, when new exclusive owener is waiting for existing readers to finish --- src/Common/SharedMutex.cpp | 6 ++++-- src/Common/SharedMutex.h | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/Common/SharedMutex.cpp b/src/Common/SharedMutex.cpp index 7b00ef0b28b..2d63f8542b0 100644 --- a/src/Common/SharedMutex.cpp +++ b/src/Common/SharedMutex.cpp @@ -31,11 +31,13 @@ void SharedMutex::lock() break; } + /// The first step of acquiring the exclusive ownership is finished. + /// Now we just wait until all readers release the shared ownership. + writer_thread_id.store(getThreadId()); + value |= writers; while (value & readers) futexWaitLowerFetch(state, value); - - writer_thread_id.store(getThreadId()); } bool SharedMutex::try_lock() diff --git a/src/Common/SharedMutex.h b/src/Common/SharedMutex.h index c77c8765885..d2947645eca 100644 --- a/src/Common/SharedMutex.h +++ b/src/Common/SharedMutex.h @@ -38,7 +38,7 @@ private: alignas(64) std::atomic state; std::atomic waiters; - /// Is set while the lock is held in exclusive mode only to facilitate debugging + /// Is set while the lock is held (or is in the process of being acquired) in exclusive mode only to facilitate debugging std::atomic writer_thread_id; }; From 378502a331c60cc023e0c965611237cb5a3cfb47 Mon Sep 17 00:00:00 2001 From: Nikita Fomichev Date: Mon, 22 Jul 2024 12:54:05 +0200 Subject: [PATCH 119/661] Stateless tests: sync tests with private --- .../01651_lc_insert_tiny_log.reference | 12 +- .../0_stateless/01651_lc_insert_tiny_log.sql | 6 +- .../01753_direct_dictionary_simple_key.sql | 2 +- .../0_stateless/02372_analyzer_join.reference | 688 +++++++++--------- .../0_stateless/02372_analyzer_join.sql.j2 | 44 +- .../02373_analyzer_join_use_nulls.reference | 16 +- .../02373_analyzer_join_use_nulls.sql | 16 +- .../02992_all_columns_should_have_comment.sql | 6 +- 8 files changed, 397 insertions(+), 393 deletions(-) diff --git a/tests/queries/0_stateless/01651_lc_insert_tiny_log.reference b/tests/queries/0_stateless/01651_lc_insert_tiny_log.reference index 3da44c57b27..5cc8909b6c8 100644 --- a/tests/queries/0_stateless/01651_lc_insert_tiny_log.reference +++ b/tests/queries/0_stateless/01651_lc_insert_tiny_log.reference @@ -1,12 +1,12 @@ 10000000 10000000 1274991808 -20000000 -20000000 2549983616 +30000000 +30000000 3824991808 10000000 10000000 1274991808 -20000000 -20000000 2549983616 +30000000 +30000000 3824991808 10000000 10000000 1274991808 -20000000 -20000000 2549983616 +30000000 +30000000 3824991808 diff --git a/tests/queries/0_stateless/01651_lc_insert_tiny_log.sql b/tests/queries/0_stateless/01651_lc_insert_tiny_log.sql index d405bb01fd9..d11c9120c61 100644 --- a/tests/queries/0_stateless/01651_lc_insert_tiny_log.sql +++ b/tests/queries/0_stateless/01651_lc_insert_tiny_log.sql @@ -8,7 +8,7 @@ INSERT INTO perf_lc_num (num) SELECT toUInt8(number) FROM numbers(10000000); select sum(length(arr)) from perf_lc_num; select sum(length(arr)), sum(num) from perf_lc_num; -INSERT INTO perf_lc_num (num) SELECT toUInt8(number) FROM numbers(10000000); +INSERT INTO perf_lc_num (num) SELECT toUInt8(number) FROM numbers(10000000, 20000000); select sum(length(arr)) from perf_lc_num; select sum(length(arr)), sum(num) from perf_lc_num; @@ -23,7 +23,7 @@ INSERT INTO perf_lc_num (num) SELECT toUInt8(number) FROM numbers(10000000); select sum(length(arr)) from perf_lc_num; select sum(length(arr)), sum(num) from perf_lc_num; -INSERT INTO perf_lc_num (num) SELECT toUInt8(number) FROM numbers(10000000); +INSERT INTO perf_lc_num (num) SELECT toUInt8(number) FROM numbers(10000000, 20000000); select sum(length(arr)) from perf_lc_num; select sum(length(arr)), sum(num) from perf_lc_num; @@ -38,7 +38,7 @@ INSERT INTO perf_lc_num (num) SELECT toUInt8(number) FROM numbers(10000000); select sum(length(arr)) from perf_lc_num; select sum(length(arr)), sum(num) from perf_lc_num; -INSERT INTO perf_lc_num (num) SELECT toUInt8(number) FROM numbers(10000000); +INSERT INTO perf_lc_num (num) SELECT toUInt8(number) FROM numbers(10000000, 20000000); select sum(length(arr)) from perf_lc_num; select sum(length(arr)), sum(num) from perf_lc_num; diff --git a/tests/queries/0_stateless/01753_direct_dictionary_simple_key.sql b/tests/queries/0_stateless/01753_direct_dictionary_simple_key.sql index 86af09f391d..93ed3f93c4e 100644 --- a/tests/queries/0_stateless/01753_direct_dictionary_simple_key.sql +++ b/tests/queries/0_stateless/01753_direct_dictionary_simple_key.sql @@ -41,7 +41,7 @@ SELECT dictGetOrDefault('01753_dictionary_db.direct_dictionary_simple_key_simple SELECT 'dictHas'; SELECT dictHas('01753_dictionary_db.direct_dictionary_simple_key_simple_attributes', number) FROM system.numbers LIMIT 4; SELECT 'select all values as input stream'; -SELECT * FROM 01753_dictionary_db.direct_dictionary_simple_key_simple_attributes; +SELECT * FROM 01753_dictionary_db.direct_dictionary_simple_key_simple_attributes ORDER BY ALL; DROP DICTIONARY 01753_dictionary_db.direct_dictionary_simple_key_simple_attributes; DROP TABLE 01753_dictionary_db.simple_key_simple_attributes_source_table; diff --git a/tests/queries/0_stateless/02372_analyzer_join.reference b/tests/queries/0_stateless/02372_analyzer_join.reference index eefcb1e50dc..9204dded262 100644 --- a/tests/queries/0_stateless/02372_analyzer_join.reference +++ b/tests/queries/0_stateless/02372_analyzer_join.reference @@ -5,63 +5,63 @@ JOIN INNER SELECT 'JOIN ON without conditions'; JOIN ON without conditions SELECT test_table_join_1.id, test_table_join_1.value, test_table_join_2.id, test_table_join_2.value -FROM test_table_join_1 INNER JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id; +FROM test_table_join_1 INNER JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id ORDER BY ALL; 0 Join_1_Value_0 0 Join_2_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 SELECT '--'; -- SELECT t1.id, t1.value, t2.id, t2.value -FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id; +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id ORDER BY ALL; 0 Join_1_Value_0 0 Join_2_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 SELECT '--'; -- SELECT t1.id, test_table_join_1.id, t1.value, test_table_join_1.value, t2.id, test_table_join_2.id, t2.value, test_table_join_2.value -FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id; +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id ORDER BY ALL; 0 0 Join_1_Value_0 Join_1_Value_0 0 0 Join_2_Value_0 Join_2_Value_0 1 1 Join_1_Value_1 Join_1_Value_1 1 1 Join_2_Value_1 Join_2_Value_1 SELECT '--'; -- SELECT t1.value, t2.value -FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id; +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id ORDER BY ALL; Join_1_Value_0 Join_2_Value_0 Join_1_Value_1 Join_2_Value_1 -SELECT id FROM test_table_join_1 INNER JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id; -- { serverError AMBIGUOUS_IDENTIFIER } -SELECT value FROM test_table_join_1 INNER JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id; -- { serverError AMBIGUOUS_IDENTIFIER } +SELECT id FROM test_table_join_1 INNER JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id ORDER BY ALL; -- { serverError AMBIGUOUS_IDENTIFIER } +SELECT value FROM test_table_join_1 INNER JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id ORDER BY ALL; -- { serverError AMBIGUOUS_IDENTIFIER } SELECT 'JOIN ON with conditions'; JOIN ON with conditions SELECT t1.id, t1.value, t2.id, t2.value -FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value = 'Join_1_Value_0'; +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value = 'Join_1_Value_0' ORDER BY ALL; 0 Join_1_Value_0 0 Join_2_Value_0 SELECT '--'; -- SELECT t1.id, t1.value, t2.id, t2.value -FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t2.value = 'Join_2_Value_0'; +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t2.value = 'Join_2_Value_0' ORDER BY ALL; 0 Join_1_Value_0 0 Join_2_Value_0 SELECT '--'; -- SELECT t1.id, t1.value, t2.id, t2.value -FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value = 'Join_1_Value_0' AND t2.value = 'Join_2_Value_0'; +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value = 'Join_1_Value_0' AND t2.value = 'Join_2_Value_0' ORDER BY ALL; 0 Join_1_Value_0 0 Join_2_Value_0 SELECT '--'; -- SELECT t1.id, t1.value, t2.id, t2.value -FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON toString(t1.id) = toString(t2.id) AND t1.value = 'Join_1_Value_0' AND t2.value = 'Join_2_Value_0'; +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON toString(t1.id) = toString(t2.id) AND t1.value = 'Join_1_Value_0' AND t2.value = 'Join_2_Value_0' ORDER BY ALL; 0 Join_1_Value_0 0 Join_2_Value_0 SELECT 'JOIN multiple clauses'; JOIN multiple clauses SELECT t1.id, t1.value, t2.id, t2.value -FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id OR t1.id = t2.id; +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id OR t1.id = t2.id ORDER BY ALL; 0 Join_1_Value_0 0 Join_2_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 SELECT 'JOIN expression aliases'; JOIN expression aliases -SELECT t1_id, t1.value, t2_id, t2.value FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON (t1.id AS t1_id) = (t2.id AS t2_id); +SELECT t1_id, t1.value, t2_id, t2.value FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON (t1.id AS t1_id) = (t2.id AS t2_id) ORDER BY ALL; 0 Join_1_Value_0 0 Join_2_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 SELECT '--'; -- -SELECT t1.id AS t1_id, t1.value, t2.id AS t2_id, t2.value FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1_id = t2_id; +SELECT t1.id AS t1_id, t1.value, t2.id AS t2_id, t2.value FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1_id = t2_id ORDER BY ALL; 0 Join_1_Value_0 0 Join_2_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 SELECT 'JOIN LEFT'; @@ -69,75 +69,75 @@ JOIN LEFT SELECT 'JOIN ON without conditions'; JOIN ON without conditions SELECT test_table_join_1.id, test_table_join_1.value, test_table_join_2.id, test_table_join_2.value -FROM test_table_join_1 LEFT JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id; +FROM test_table_join_1 LEFT JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id ORDER BY ALL; 0 Join_1_Value_0 0 Join_2_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 2 Join_1_Value_2 0 SELECT '--'; -- SELECT t1.id, t1.value, t2.id, t2.value -FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON t1.id = t2.id; +FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON t1.id = t2.id ORDER BY ALL; 0 Join_1_Value_0 0 Join_2_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 2 Join_1_Value_2 0 SELECT '--'; -- SELECT t1.id, test_table_join_1.id, t1.value, test_table_join_1.value, t2.id, test_table_join_2.id, t2.value, test_table_join_2.value -FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON t1.id = t2.id; +FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON t1.id = t2.id ORDER BY ALL; 0 0 Join_1_Value_0 Join_1_Value_0 0 0 Join_2_Value_0 Join_2_Value_0 1 1 Join_1_Value_1 Join_1_Value_1 1 1 Join_2_Value_1 Join_2_Value_1 2 2 Join_1_Value_2 Join_1_Value_2 0 0 SELECT '--'; -- SELECT t1.value, t2.value -FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON t1.id = t2.id; +FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON t1.id = t2.id ORDER BY ALL; Join_1_Value_0 Join_2_Value_0 Join_1_Value_1 Join_2_Value_1 Join_1_Value_2 -SELECT id FROM test_table_join_1 LEFT JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id; -- { serverError AMBIGUOUS_IDENTIFIER } -SELECT value FROM test_table_join_1 LEFT JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id; -- { serverError AMBIGUOUS_IDENTIFIER } +SELECT id FROM test_table_join_1 LEFT JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id ORDER BY ALL; -- { serverError AMBIGUOUS_IDENTIFIER } +SELECT value FROM test_table_join_1 LEFT JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id ORDER BY ALL; -- { serverError AMBIGUOUS_IDENTIFIER } SELECT 'JOIN ON with conditions'; JOIN ON with conditions SELECT t1.id, t1.value, t2.id, t2.value -FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value = 'Join_1_Value_0'; +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value = 'Join_1_Value_0' ORDER BY ALL; 0 Join_1_Value_0 0 Join_2_Value_0 SELECT '--'; -- SELECT t1.id, t1.value, t2.id, t2.value -FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t2.value = 'Join_2_Value_0'; +FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t2.value = 'Join_2_Value_0' ORDER BY ALL; 0 Join_1_Value_0 0 Join_2_Value_0 1 Join_1_Value_1 0 2 Join_1_Value_2 0 SELECT '--'; -- SELECT t1.id, t1.value, t2.id, t2.value -FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value = 'Join_1_Value_0' AND t2.value = 'Join_2_Value_0'; +FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value = 'Join_1_Value_0' AND t2.value = 'Join_2_Value_0' ORDER BY ALL; 0 Join_1_Value_0 0 Join_2_Value_0 1 Join_1_Value_1 0 2 Join_1_Value_2 0 SELECT '--'; -- SELECT t1.id, t1.value, t2.id, t2.value -FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON toString(t1.id) = toString(t2.id) AND t1.value = 'Join_1_Value_0' AND t2.value = 'Join_2_Value_0'; +FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON toString(t1.id) = toString(t2.id) AND t1.value = 'Join_1_Value_0' AND t2.value = 'Join_2_Value_0' ORDER BY ALL; 0 Join_1_Value_0 0 Join_2_Value_0 1 Join_1_Value_1 0 2 Join_1_Value_2 0 SELECT 'JOIN multiple clauses'; JOIN multiple clauses SELECT t1.id, t1.value, t2.id, t2.value -FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON t1.id = t2.id OR t1.id = t2.id; +FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON t1.id = t2.id OR t1.id = t2.id ORDER BY ALL; 0 Join_1_Value_0 0 Join_2_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 2 Join_1_Value_2 0 SELECT 'JOIN expression aliases'; JOIN expression aliases -SELECT t1_id, t1.value, t2_id, t2.value FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON (t1.id AS t1_id) = (t2.id AS t2_id); +SELECT t1_id, t1.value, t2_id, t2.value FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON (t1.id AS t1_id) = (t2.id AS t2_id) ORDER BY ALL; 0 Join_1_Value_0 0 Join_2_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 2 Join_1_Value_2 0 SELECT '--'; -- -SELECT t1.id AS t1_id, t1.value, t2.id AS t2_id, t2.value FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON t1_id = t2_id; +SELECT t1.id AS t1_id, t1.value, t2.id AS t2_id, t2.value FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON t1_id = t2_id ORDER BY ALL; 0 Join_1_Value_0 0 Join_2_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 2 Join_1_Value_2 0 @@ -146,182 +146,182 @@ JOIN RIGHT SELECT 'JOIN ON without conditions'; JOIN ON without conditions SELECT test_table_join_1.id, test_table_join_1.value, test_table_join_2.id, test_table_join_2.value -FROM test_table_join_1 RIGHT JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id; +FROM test_table_join_1 RIGHT JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id ORDER BY ALL; +0 3 Join_2_Value_3 0 Join_1_Value_0 0 Join_2_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 -0 3 Join_2_Value_3 SELECT '--'; -- SELECT t1.id, t1.value, t2.id, t2.value -FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON t1.id = t2.id; +FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON t1.id = t2.id ORDER BY ALL; +0 3 Join_2_Value_3 0 Join_1_Value_0 0 Join_2_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 -0 3 Join_2_Value_3 SELECT '--'; -- SELECT t1.id, test_table_join_1.id, t1.value, test_table_join_1.value, t2.id, test_table_join_2.id, t2.value, test_table_join_2.value -FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON t1.id = t2.id; +FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON t1.id = t2.id ORDER BY ALL; +0 0 3 3 Join_2_Value_3 Join_2_Value_3 0 0 Join_1_Value_0 Join_1_Value_0 0 0 Join_2_Value_0 Join_2_Value_0 1 1 Join_1_Value_1 Join_1_Value_1 1 1 Join_2_Value_1 Join_2_Value_1 -0 0 3 3 Join_2_Value_3 Join_2_Value_3 SELECT '--'; -- SELECT t1.value, t2.value -FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON t1.id = t2.id; +FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON t1.id = t2.id ORDER BY ALL; + Join_2_Value_3 Join_1_Value_0 Join_2_Value_0 Join_1_Value_1 Join_2_Value_1 - Join_2_Value_3 -SELECT id FROM test_table_join_1 RIGHT JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id; -- { serverError AMBIGUOUS_IDENTIFIER } -SELECT value FROM test_table_join_1 RIGHT JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id; -- { serverError AMBIGUOUS_IDENTIFIER } +SELECT id FROM test_table_join_1 RIGHT JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id ORDER BY ALL; -- { serverError AMBIGUOUS_IDENTIFIER } +SELECT value FROM test_table_join_1 RIGHT JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id ORDER BY ALL; -- { serverError AMBIGUOUS_IDENTIFIER } SELECT 'JOIN ON with conditions'; JOIN ON with conditions SELECT t1.id, t1.value, t2.id, t2.value -FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value = 'Join_1_Value_0'; +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value = 'Join_1_Value_0' ORDER BY ALL; 0 Join_1_Value_0 0 Join_2_Value_0 SELECT '--'; -- SELECT t1.id, t1.value, t2.id, t2.value -FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t2.value = 'Join_2_Value_0'; -0 Join_1_Value_0 0 Join_2_Value_0 +FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t2.value = 'Join_2_Value_0' ORDER BY ALL; 0 1 Join_2_Value_1 0 3 Join_2_Value_3 +0 Join_1_Value_0 0 Join_2_Value_0 SELECT '--'; -- SELECT t1.id, t1.value, t2.id, t2.value -FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value = 'Join_1_Value_0' AND t2.value = 'Join_2_Value_0'; -0 Join_1_Value_0 0 Join_2_Value_0 +FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value = 'Join_1_Value_0' AND t2.value = 'Join_2_Value_0' ORDER BY ALL; 0 1 Join_2_Value_1 0 3 Join_2_Value_3 +0 Join_1_Value_0 0 Join_2_Value_0 SELECT '--'; -- SELECT t1.id, t1.value, t2.id, t2.value -FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON toString(t1.id) = toString(t2.id) AND t1.value = 'Join_1_Value_0' AND t2.value = 'Join_2_Value_0'; -0 Join_1_Value_0 0 Join_2_Value_0 +FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON toString(t1.id) = toString(t2.id) AND t1.value = 'Join_1_Value_0' AND t2.value = 'Join_2_Value_0' ORDER BY ALL; 0 1 Join_2_Value_1 0 3 Join_2_Value_3 +0 Join_1_Value_0 0 Join_2_Value_0 SELECT 'JOIN multiple clauses'; JOIN multiple clauses SELECT t1.id, t1.value, t2.id, t2.value -FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON t1.id = t2.id OR t1.id = t2.id; +FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON t1.id = t2.id OR t1.id = t2.id ORDER BY ALL; +0 3 Join_2_Value_3 0 Join_1_Value_0 0 Join_2_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 -0 3 Join_2_Value_3 SELECT 'JOIN expression aliases'; JOIN expression aliases -SELECT t1_id, t1.value, t2_id, t2.value FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON (t1.id AS t1_id) = (t2.id AS t2_id); +SELECT t1_id, t1.value, t2_id, t2.value FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON (t1.id AS t1_id) = (t2.id AS t2_id) ORDER BY ALL; +0 3 Join_2_Value_3 0 Join_1_Value_0 0 Join_2_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 -0 3 Join_2_Value_3 SELECT '--'; -- -SELECT t1.id AS t1_id, t1.value, t2.id AS t2_id, t2.value FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON t1_id = t2_id; +SELECT t1.id AS t1_id, t1.value, t2.id AS t2_id, t2.value FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON t1_id = t2_id ORDER BY ALL; +0 3 Join_2_Value_3 0 Join_1_Value_0 0 Join_2_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 -0 3 Join_2_Value_3 SELECT 'JOIN FULL'; JOIN FULL SELECT 'JOIN ON without conditions'; JOIN ON without conditions SELECT test_table_join_1.id, test_table_join_1.value, test_table_join_2.id, test_table_join_2.value -FROM test_table_join_1 FULL JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id; +FROM test_table_join_1 FULL JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id ORDER BY ALL; +0 3 Join_2_Value_3 0 Join_1_Value_0 0 Join_2_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 2 Join_1_Value_2 0 -0 3 Join_2_Value_3 SELECT '--'; -- SELECT t1.id, t1.value, t2.id, t2.value -FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON t1.id = t2.id; +FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON t1.id = t2.id ORDER BY ALL; +0 3 Join_2_Value_3 0 Join_1_Value_0 0 Join_2_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 2 Join_1_Value_2 0 -0 3 Join_2_Value_3 SELECT '--'; -- SELECT t1.id, test_table_join_1.id, t1.value, test_table_join_1.value, t2.id, test_table_join_2.id, t2.value, test_table_join_2.value -FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON t1.id = t2.id; +FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON t1.id = t2.id ORDER BY ALL; +0 0 3 3 Join_2_Value_3 Join_2_Value_3 0 0 Join_1_Value_0 Join_1_Value_0 0 0 Join_2_Value_0 Join_2_Value_0 1 1 Join_1_Value_1 Join_1_Value_1 1 1 Join_2_Value_1 Join_2_Value_1 2 2 Join_1_Value_2 Join_1_Value_2 0 0 -0 0 3 3 Join_2_Value_3 Join_2_Value_3 SELECT '--'; -- SELECT t1.value, t2.value -FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON t1.id = t2.id; +FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON t1.id = t2.id ORDER BY ALL; + Join_2_Value_3 Join_1_Value_0 Join_2_Value_0 Join_1_Value_1 Join_2_Value_1 Join_1_Value_2 - Join_2_Value_3 -SELECT id FROM test_table_join_1 FULL JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id; -- { serverError AMBIGUOUS_IDENTIFIER } -SELECT value FROM test_table_join_1 FULL JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id; -- { serverError AMBIGUOUS_IDENTIFIER } +SELECT id FROM test_table_join_1 FULL JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id ORDER BY ALL; -- { serverError AMBIGUOUS_IDENTIFIER } +SELECT value FROM test_table_join_1 FULL JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id ORDER BY ALL; -- { serverError AMBIGUOUS_IDENTIFIER } SELECT 'JOIN ON with conditions'; JOIN ON with conditions SELECT t1.id, t1.value, t2.id, t2.value -FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value = 'Join_1_Value_0'; +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value = 'Join_1_Value_0' ORDER BY ALL; 0 Join_1_Value_0 0 Join_2_Value_0 SELECT '--'; -- SELECT t1.id, t1.value, t2.id, t2.value -FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t2.value = 'Join_2_Value_0'; +FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t2.value = 'Join_2_Value_0' ORDER BY ALL; +0 1 Join_2_Value_1 +0 3 Join_2_Value_3 0 Join_1_Value_0 0 Join_2_Value_0 1 Join_1_Value_1 0 2 Join_1_Value_2 0 -0 1 Join_2_Value_1 -0 3 Join_2_Value_3 SELECT '--'; -- SELECT t1.id, t1.value, t2.id, t2.value -FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value = 'Join_1_Value_0' AND t2.value = 'Join_2_Value_0'; +FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value = 'Join_1_Value_0' AND t2.value = 'Join_2_Value_0' ORDER BY ALL; +0 1 Join_2_Value_1 +0 3 Join_2_Value_3 0 Join_1_Value_0 0 Join_2_Value_0 1 Join_1_Value_1 0 2 Join_1_Value_2 0 -0 1 Join_2_Value_1 -0 3 Join_2_Value_3 SELECT '--'; -- SELECT t1.id, t1.value, t2.id, t2.value -FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON toString(t1.id) = toString(t2.id) AND t1.value = 'Join_1_Value_0' AND t2.value = 'Join_2_Value_0'; +FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON toString(t1.id) = toString(t2.id) AND t1.value = 'Join_1_Value_0' AND t2.value = 'Join_2_Value_0' ORDER BY ALL; +0 1 Join_2_Value_1 +0 3 Join_2_Value_3 0 Join_1_Value_0 0 Join_2_Value_0 1 Join_1_Value_1 0 2 Join_1_Value_2 0 -0 1 Join_2_Value_1 -0 3 Join_2_Value_3 SELECT 'JOIN multiple clauses'; JOIN multiple clauses SELECT t1.id, t1.value, t2.id, t2.value -FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON t1.id = t2.id OR t1.id = t2.id; +FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON t1.id = t2.id OR t1.id = t2.id ORDER BY ALL; +0 3 Join_2_Value_3 0 Join_1_Value_0 0 Join_2_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 2 Join_1_Value_2 0 -0 3 Join_2_Value_3 SELECT 'JOIN expression aliases'; JOIN expression aliases -SELECT t1_id, t1.value, t2_id, t2.value FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON (t1.id AS t1_id) = (t2.id AS t2_id); +SELECT t1_id, t1.value, t2_id, t2.value FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON (t1.id AS t1_id) = (t2.id AS t2_id) ORDER BY ALL; +0 3 Join_2_Value_3 0 Join_1_Value_0 0 Join_2_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 2 Join_1_Value_2 0 -0 3 Join_2_Value_3 SELECT '--'; -- -SELECT t1.id AS t1_id, t1.value, t2.id AS t2_id, t2.value FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON t1_id = t2_id; +SELECT t1.id AS t1_id, t1.value, t2.id AS t2_id, t2.value FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON t1_id = t2_id ORDER BY ALL; +0 3 Join_2_Value_3 0 Join_1_Value_0 0 Join_2_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 2 Join_1_Value_2 0 -0 3 Join_2_Value_3 SELECT 'First JOIN INNER second JOIN INNER'; First JOIN INNER second JOIN INNER SELECT 'JOIN ON without conditions'; JOIN ON without conditions SELECT test_table_join_1.id, test_table_join_1.value, test_table_join_2.id, test_table_join_2.value, test_table_join_3.id, test_table_join_3.value FROM test_table_join_1 INNER JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id -INNER JOIN test_table_join_3 ON test_table_join_2.id = test_table_join_3.id; +INNER JOIN test_table_join_3 ON test_table_join_2.id = test_table_join_3.id ORDER BY ALL; 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 SELECT '--'; -- SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id -INNER JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +INNER JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id ORDER BY ALL; 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 SELECT '--'; @@ -329,48 +329,48 @@ SELECT '--'; SELECT t1.id, test_table_join_1.id, t1.value, test_table_join_1.value, t2.id, test_table_join_2.id, t2.value, test_table_join_2.value, t3.id, test_table_join_3.id, t3.value, test_table_join_3.value FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id -INNER JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +INNER JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id ORDER BY ALL; 0 0 Join_1_Value_0 Join_1_Value_0 0 0 Join_2_Value_0 Join_2_Value_0 0 0 Join_3_Value_0 Join_3_Value_0 1 1 Join_1_Value_1 Join_1_Value_1 1 1 Join_2_Value_1 Join_2_Value_1 1 1 Join_3_Value_1 Join_3_Value_1 SELECT '--'; -- SELECT t1.value, t2.value, t3.value FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id -INNER JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +INNER JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id ORDER BY ALL; Join_1_Value_0 Join_2_Value_0 Join_3_Value_0 Join_1_Value_1 Join_2_Value_1 Join_3_Value_1 SELECT 'JOIN ON with conditions'; JOIN ON with conditions SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value == 'Join_1_Value_0' -INNER JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0'; +INNER JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0' ORDER BY ALL; 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 SELECT '--'; -- SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value == 'Join_1_Value_0' AND t2.value == 'Join_2_Value_0' -INNER JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0' AND t3.value == 'Join_3_Value_0'; +INNER JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0' AND t3.value == 'Join_3_Value_0' ORDER BY ALL; 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 SELECT 'JOIN multiple clauses'; JOIN multiple clauses SELECT t1.value, t2.value, t3.value FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id OR t1.id = t2.id -INNER JOIN test_table_join_3 AS t3 ON t2.id = t3.id OR t3.id = t2.id; +INNER JOIN test_table_join_3 AS t3 ON t2.id = t3.id OR t3.id = t2.id ORDER BY ALL; Join_1_Value_0 Join_2_Value_0 Join_3_Value_0 Join_1_Value_1 Join_2_Value_1 Join_3_Value_1 SELECT 'JOIN expression aliases'; JOIN expression aliases SELECT t1_id, t1.value, t2_id, t2.value, t3_id, t3.value FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON (t1.id AS t1_id) = (t2.id AS t2_id) -INNER JOIN test_table_join_3 AS t3 ON t2_id = (t3.id AS t3_id); +INNER JOIN test_table_join_3 AS t3 ON t2_id = (t3.id AS t3_id) ORDER BY ALL; 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 SELECT '--'; -- SELECT t1.id AS t1_id, t1.value, t2.id AS t2_id, t2.value, t3.id AS t3_id, t3.value FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1_id = t2_id -INNER JOIN test_table_join_3 AS t3 ON t2_id = t3_id; +INNER JOIN test_table_join_3 AS t3 ON t2_id = t3_id ORDER BY ALL; 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 SELECT 'First JOIN INNER second JOIN LEFT'; @@ -379,14 +379,14 @@ SELECT 'JOIN ON without conditions'; JOIN ON without conditions SELECT test_table_join_1.id, test_table_join_1.value, test_table_join_2.id, test_table_join_2.value, test_table_join_3.id, test_table_join_3.value FROM test_table_join_1 INNER JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id -LEFT JOIN test_table_join_3 ON test_table_join_2.id = test_table_join_3.id; +LEFT JOIN test_table_join_3 ON test_table_join_2.id = test_table_join_3.id ORDER BY ALL; 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 SELECT '--'; -- SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id -LEFT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +LEFT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id ORDER BY ALL; 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 SELECT '--'; @@ -394,48 +394,48 @@ SELECT '--'; SELECT t1.id, test_table_join_1.id, t1.value, test_table_join_1.value, t2.id, test_table_join_2.id, t2.value, test_table_join_2.value, t3.id, test_table_join_3.id, t3.value, test_table_join_3.value FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id -LEFT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +LEFT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id ORDER BY ALL; 0 0 Join_1_Value_0 Join_1_Value_0 0 0 Join_2_Value_0 Join_2_Value_0 0 0 Join_3_Value_0 Join_3_Value_0 1 1 Join_1_Value_1 Join_1_Value_1 1 1 Join_2_Value_1 Join_2_Value_1 1 1 Join_3_Value_1 Join_3_Value_1 SELECT '--'; -- SELECT t1.value, t2.value, t3.value FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id -LEFT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +LEFT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id ORDER BY ALL; Join_1_Value_0 Join_2_Value_0 Join_3_Value_0 Join_1_Value_1 Join_2_Value_1 Join_3_Value_1 SELECT 'JOIN ON with conditions'; JOIN ON with conditions SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value == 'Join_1_Value_0' -LEFT JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0'; +LEFT JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0' ORDER BY ALL; 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 SELECT '--'; -- SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value == 'Join_1_Value_0' AND t2.value == 'Join_2_Value_0' -LEFT JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0' AND t3.value == 'Join_3_Value_0'; +LEFT JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0' AND t3.value == 'Join_3_Value_0' ORDER BY ALL; 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 SELECT 'JOIN multiple clauses'; JOIN multiple clauses SELECT t1.value, t2.value, t3.value FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id OR t1.id = t2.id -LEFT JOIN test_table_join_3 AS t3 ON t2.id = t3.id OR t3.id = t2.id; +LEFT JOIN test_table_join_3 AS t3 ON t2.id = t3.id OR t3.id = t2.id ORDER BY ALL; Join_1_Value_0 Join_2_Value_0 Join_3_Value_0 Join_1_Value_1 Join_2_Value_1 Join_3_Value_1 SELECT 'JOIN expression aliases'; JOIN expression aliases SELECT t1_id, t1.value, t2_id, t2.value, t3_id, t3.value FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON (t1.id AS t1_id) = (t2.id AS t2_id) -LEFT JOIN test_table_join_3 AS t3 ON t2_id = (t3.id AS t3_id); +LEFT JOIN test_table_join_3 AS t3 ON t2_id = (t3.id AS t3_id) ORDER BY ALL; 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 SELECT '--'; -- SELECT t1.id AS t1_id, t1.value, t2.id AS t2_id, t2.value, t3.id AS t3_id, t3.value FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1_id = t2_id -LEFT JOIN test_table_join_3 AS t3 ON t2_id = t3_id; +LEFT JOIN test_table_join_3 AS t3 ON t2_id = t3_id ORDER BY ALL; 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 SELECT 'First JOIN INNER second JOIN RIGHT'; @@ -444,159 +444,159 @@ SELECT 'JOIN ON without conditions'; JOIN ON without conditions SELECT test_table_join_1.id, test_table_join_1.value, test_table_join_2.id, test_table_join_2.value, test_table_join_3.id, test_table_join_3.value FROM test_table_join_1 INNER JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id -RIGHT JOIN test_table_join_3 ON test_table_join_2.id = test_table_join_3.id; +RIGHT JOIN test_table_join_3 ON test_table_join_2.id = test_table_join_3.id ORDER BY ALL; +0 0 4 Join_3_Value_4 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 -0 0 4 Join_3_Value_4 SELECT '--'; -- SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id -RIGHT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +RIGHT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id ORDER BY ALL; +0 0 4 Join_3_Value_4 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 -0 0 4 Join_3_Value_4 SELECT '--'; -- SELECT t1.id, test_table_join_1.id, t1.value, test_table_join_1.value, t2.id, test_table_join_2.id, t2.value, test_table_join_2.value, t3.id, test_table_join_3.id, t3.value, test_table_join_3.value FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id -RIGHT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +RIGHT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id ORDER BY ALL; +0 0 0 0 4 4 Join_3_Value_4 Join_3_Value_4 0 0 Join_1_Value_0 Join_1_Value_0 0 0 Join_2_Value_0 Join_2_Value_0 0 0 Join_3_Value_0 Join_3_Value_0 1 1 Join_1_Value_1 Join_1_Value_1 1 1 Join_2_Value_1 Join_2_Value_1 1 1 Join_3_Value_1 Join_3_Value_1 -0 0 0 0 4 4 Join_3_Value_4 Join_3_Value_4 SELECT '--'; -- SELECT t1.value, t2.value, t3.value FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id -RIGHT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +RIGHT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id ORDER BY ALL; + Join_3_Value_4 Join_1_Value_0 Join_2_Value_0 Join_3_Value_0 Join_1_Value_1 Join_2_Value_1 Join_3_Value_1 - Join_3_Value_4 SELECT 'JOIN ON with conditions'; JOIN ON with conditions SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value == 'Join_1_Value_0' -RIGHT JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0'; -0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 -0 0 4 Join_3_Value_4 +RIGHT JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0' ORDER BY ALL; 0 0 1 Join_3_Value_1 +0 0 4 Join_3_Value_4 +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 SELECT '--'; -- SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value == 'Join_1_Value_0' AND t2.value == 'Join_2_Value_0' -RIGHT JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0' AND t3.value == 'Join_3_Value_0'; -0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +RIGHT JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0' AND t3.value == 'Join_3_Value_0' ORDER BY ALL; 0 0 1 Join_3_Value_1 0 0 4 Join_3_Value_4 +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 SELECT 'JOIN multiple clauses'; JOIN multiple clauses SELECT t1.value, t2.value, t3.value FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id OR t1.id = t2.id -RIGHT JOIN test_table_join_3 AS t3 ON t2.id = t3.id OR t3.id = t2.id; +RIGHT JOIN test_table_join_3 AS t3 ON t2.id = t3.id OR t3.id = t2.id ORDER BY ALL; + Join_3_Value_4 Join_1_Value_0 Join_2_Value_0 Join_3_Value_0 Join_1_Value_1 Join_2_Value_1 Join_3_Value_1 - Join_3_Value_4 SELECT 'JOIN expression aliases'; JOIN expression aliases SELECT t1_id, t1.value, t2_id, t2.value, t3_id, t3.value FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON (t1.id AS t1_id) = (t2.id AS t2_id) -RIGHT JOIN test_table_join_3 AS t3 ON t2_id = (t3.id AS t3_id); +RIGHT JOIN test_table_join_3 AS t3 ON t2_id = (t3.id AS t3_id) ORDER BY ALL; +0 0 4 Join_3_Value_4 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 -0 0 4 Join_3_Value_4 SELECT '--'; -- SELECT t1.id AS t1_id, t1.value, t2.id AS t2_id, t2.value, t3.id AS t3_id, t3.value FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1_id = t2_id -RIGHT JOIN test_table_join_3 AS t3 ON t2_id = t3_id; +RIGHT JOIN test_table_join_3 AS t3 ON t2_id = t3_id ORDER BY ALL; +0 0 4 Join_3_Value_4 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 -0 0 4 Join_3_Value_4 SELECT 'First JOIN INNER second JOIN FULL'; First JOIN INNER second JOIN FULL SELECT 'JOIN ON without conditions'; JOIN ON without conditions SELECT test_table_join_1.id, test_table_join_1.value, test_table_join_2.id, test_table_join_2.value, test_table_join_3.id, test_table_join_3.value FROM test_table_join_1 INNER JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id -FULL JOIN test_table_join_3 ON test_table_join_2.id = test_table_join_3.id; +FULL JOIN test_table_join_3 ON test_table_join_2.id = test_table_join_3.id ORDER BY ALL; +0 0 4 Join_3_Value_4 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 -0 0 4 Join_3_Value_4 SELECT '--'; -- SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id -FULL JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +FULL JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id ORDER BY ALL; +0 0 4 Join_3_Value_4 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 -0 0 4 Join_3_Value_4 SELECT '--'; -- SELECT t1.id, test_table_join_1.id, t1.value, test_table_join_1.value, t2.id, test_table_join_2.id, t2.value, test_table_join_2.value, t3.id, test_table_join_3.id, t3.value, test_table_join_3.value FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id -FULL JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +FULL JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id ORDER BY ALL; +0 0 0 0 4 4 Join_3_Value_4 Join_3_Value_4 0 0 Join_1_Value_0 Join_1_Value_0 0 0 Join_2_Value_0 Join_2_Value_0 0 0 Join_3_Value_0 Join_3_Value_0 1 1 Join_1_Value_1 Join_1_Value_1 1 1 Join_2_Value_1 Join_2_Value_1 1 1 Join_3_Value_1 Join_3_Value_1 -0 0 0 0 4 4 Join_3_Value_4 Join_3_Value_4 SELECT '--'; -- SELECT t1.value, t2.value, t3.value FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id -FULL JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +FULL JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id ORDER BY ALL; + Join_3_Value_4 Join_1_Value_0 Join_2_Value_0 Join_3_Value_0 Join_1_Value_1 Join_2_Value_1 Join_3_Value_1 - Join_3_Value_4 SELECT 'JOIN ON with conditions'; JOIN ON with conditions SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value == 'Join_1_Value_0' -FULL JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0'; -0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 -0 0 4 Join_3_Value_4 +FULL JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0' ORDER BY ALL; 0 0 1 Join_3_Value_1 +0 0 4 Join_3_Value_4 +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 SELECT '--'; -- SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value == 'Join_1_Value_0' AND t2.value == 'Join_2_Value_0' -FULL JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0' AND t3.value == 'Join_3_Value_0'; -0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +FULL JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0' AND t3.value == 'Join_3_Value_0' ORDER BY ALL; 0 0 1 Join_3_Value_1 0 0 4 Join_3_Value_4 +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 SELECT 'JOIN multiple clauses'; JOIN multiple clauses SELECT t1.value, t2.value, t3.value FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id OR t1.id = t2.id -FULL JOIN test_table_join_3 AS t3 ON t2.id = t3.id OR t3.id = t2.id; +FULL JOIN test_table_join_3 AS t3 ON t2.id = t3.id OR t3.id = t2.id ORDER BY ALL; + Join_3_Value_4 Join_1_Value_0 Join_2_Value_0 Join_3_Value_0 Join_1_Value_1 Join_2_Value_1 Join_3_Value_1 - Join_3_Value_4 SELECT 'JOIN expression aliases'; JOIN expression aliases SELECT t1_id, t1.value, t2_id, t2.value, t3_id, t3.value FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON (t1.id AS t1_id) = (t2.id AS t2_id) -FULL JOIN test_table_join_3 AS t3 ON t2_id = (t3.id AS t3_id); +FULL JOIN test_table_join_3 AS t3 ON t2_id = (t3.id AS t3_id) ORDER BY ALL; +0 0 4 Join_3_Value_4 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 -0 0 4 Join_3_Value_4 SELECT '--'; -- SELECT t1.id AS t1_id, t1.value, t2.id AS t2_id, t2.value, t3.id AS t3_id, t3.value FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1_id = t2_id -FULL JOIN test_table_join_3 AS t3 ON t2_id = t3_id; +FULL JOIN test_table_join_3 AS t3 ON t2_id = t3_id ORDER BY ALL; +0 0 4 Join_3_Value_4 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 -0 0 4 Join_3_Value_4 SELECT 'First JOIN LEFT second JOIN INNER'; First JOIN LEFT second JOIN INNER SELECT 'JOIN ON without conditions'; JOIN ON without conditions SELECT test_table_join_1.id, test_table_join_1.value, test_table_join_2.id, test_table_join_2.value, test_table_join_3.id, test_table_join_3.value FROM test_table_join_1 LEFT JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id -INNER JOIN test_table_join_3 ON test_table_join_2.id = test_table_join_3.id; +INNER JOIN test_table_join_3 ON test_table_join_2.id = test_table_join_3.id ORDER BY ALL; 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 2 Join_1_Value_2 0 0 Join_3_Value_0 @@ -604,7 +604,7 @@ SELECT '--'; -- SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id -INNER JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +INNER JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id ORDER BY ALL; 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 2 Join_1_Value_2 0 0 Join_3_Value_0 @@ -613,7 +613,7 @@ SELECT '--'; SELECT t1.id, test_table_join_1.id, t1.value, test_table_join_1.value, t2.id, test_table_join_2.id, t2.value, test_table_join_2.value, t3.id, test_table_join_3.id, t3.value, test_table_join_3.value FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id -INNER JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +INNER JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id ORDER BY ALL; 0 0 Join_1_Value_0 Join_1_Value_0 0 0 Join_2_Value_0 Join_2_Value_0 0 0 Join_3_Value_0 Join_3_Value_0 1 1 Join_1_Value_1 Join_1_Value_1 1 1 Join_2_Value_1 Join_2_Value_1 1 1 Join_3_Value_1 Join_3_Value_1 2 2 Join_1_Value_2 Join_1_Value_2 0 0 0 0 Join_3_Value_0 Join_3_Value_0 @@ -621,7 +621,7 @@ SELECT '--'; -- SELECT t1.value, t2.value, t3.value FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id -INNER JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +INNER JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id ORDER BY ALL; Join_1_Value_0 Join_2_Value_0 Join_3_Value_0 Join_1_Value_1 Join_2_Value_1 Join_3_Value_1 Join_1_Value_2 Join_3_Value_0 @@ -629,20 +629,20 @@ SELECT 'JOIN ON with conditions'; JOIN ON with conditions SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value == 'Join_1_Value_0' -INNER JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0'; +INNER JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0' ORDER BY ALL; 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 SELECT '--'; -- SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value == 'Join_1_Value_0' AND t2.value == 'Join_2_Value_0' -INNER JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0' AND t3.value == 'Join_3_Value_0'; +INNER JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0' AND t3.value == 'Join_3_Value_0' ORDER BY ALL; 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 SELECT 'JOIN multiple clauses'; JOIN multiple clauses SELECT t1.value, t2.value, t3.value FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON t1.id = t2.id OR t1.id = t2.id -INNER JOIN test_table_join_3 AS t3 ON t2.id = t3.id OR t3.id = t2.id; +INNER JOIN test_table_join_3 AS t3 ON t2.id = t3.id OR t3.id = t2.id ORDER BY ALL; Join_1_Value_0 Join_2_Value_0 Join_3_Value_0 Join_1_Value_1 Join_2_Value_1 Join_3_Value_1 Join_1_Value_2 Join_3_Value_0 @@ -650,7 +650,7 @@ SELECT 'JOIN expression aliases'; JOIN expression aliases SELECT t1_id, t1.value, t2_id, t2.value, t3_id, t3.value FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON (t1.id AS t1_id) = (t2.id AS t2_id) -INNER JOIN test_table_join_3 AS t3 ON t2_id = (t3.id AS t3_id); +INNER JOIN test_table_join_3 AS t3 ON t2_id = (t3.id AS t3_id) ORDER BY ALL; 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 2 Join_1_Value_2 0 0 Join_3_Value_0 @@ -658,7 +658,7 @@ SELECT '--'; -- SELECT t1.id AS t1_id, t1.value, t2.id AS t2_id, t2.value, t3.id AS t3_id, t3.value FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON t1_id = t2_id -INNER JOIN test_table_join_3 AS t3 ON t2_id = t3_id; +INNER JOIN test_table_join_3 AS t3 ON t2_id = t3_id ORDER BY ALL; 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 2 Join_1_Value_2 0 0 Join_3_Value_0 @@ -668,7 +668,7 @@ SELECT 'JOIN ON without conditions'; JOIN ON without conditions SELECT test_table_join_1.id, test_table_join_1.value, test_table_join_2.id, test_table_join_2.value, test_table_join_3.id, test_table_join_3.value FROM test_table_join_1 LEFT JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id -LEFT JOIN test_table_join_3 ON test_table_join_2.id = test_table_join_3.id; +LEFT JOIN test_table_join_3 ON test_table_join_2.id = test_table_join_3.id ORDER BY ALL; 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 2 Join_1_Value_2 0 0 Join_3_Value_0 @@ -676,7 +676,7 @@ SELECT '--'; -- SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id -LEFT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +LEFT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id ORDER BY ALL; 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 2 Join_1_Value_2 0 0 Join_3_Value_0 @@ -685,7 +685,7 @@ SELECT '--'; SELECT t1.id, test_table_join_1.id, t1.value, test_table_join_1.value, t2.id, test_table_join_2.id, t2.value, test_table_join_2.value, t3.id, test_table_join_3.id, t3.value, test_table_join_3.value FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id -LEFT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +LEFT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id ORDER BY ALL; 0 0 Join_1_Value_0 Join_1_Value_0 0 0 Join_2_Value_0 Join_2_Value_0 0 0 Join_3_Value_0 Join_3_Value_0 1 1 Join_1_Value_1 Join_1_Value_1 1 1 Join_2_Value_1 Join_2_Value_1 1 1 Join_3_Value_1 Join_3_Value_1 2 2 Join_1_Value_2 Join_1_Value_2 0 0 0 0 Join_3_Value_0 Join_3_Value_0 @@ -693,7 +693,7 @@ SELECT '--'; -- SELECT t1.value, t2.value, t3.value FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id -LEFT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +LEFT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id ORDER BY ALL; Join_1_Value_0 Join_2_Value_0 Join_3_Value_0 Join_1_Value_1 Join_2_Value_1 Join_3_Value_1 Join_1_Value_2 Join_3_Value_0 @@ -701,7 +701,7 @@ SELECT 'JOIN ON with conditions'; JOIN ON with conditions SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value == 'Join_1_Value_0' -LEFT JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0'; +LEFT JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0' ORDER BY ALL; 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 0 0 2 Join_1_Value_2 0 0 @@ -709,7 +709,7 @@ SELECT '--'; -- SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value == 'Join_1_Value_0' AND t2.value == 'Join_2_Value_0' -LEFT JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0' AND t3.value == 'Join_3_Value_0'; +LEFT JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0' AND t3.value == 'Join_3_Value_0' ORDER BY ALL; 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 0 0 2 Join_1_Value_2 0 0 @@ -718,7 +718,7 @@ JOIN multiple clauses SELECT t1.value, t2.value, t3.value FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON t1.id = t2.id OR t1.id = t2.id -LEFT JOIN test_table_join_3 AS t3 ON t2.id = t3.id OR t3.id = t2.id; +LEFT JOIN test_table_join_3 AS t3 ON t2.id = t3.id OR t3.id = t2.id ORDER BY ALL; Join_1_Value_0 Join_2_Value_0 Join_3_Value_0 Join_1_Value_1 Join_2_Value_1 Join_3_Value_1 Join_1_Value_2 Join_3_Value_0 @@ -726,7 +726,7 @@ SELECT 'JOIN expression aliases'; JOIN expression aliases SELECT t1_id, t1.value, t2_id, t2.value, t3_id, t3.value FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON (t1.id AS t1_id) = (t2.id AS t2_id) -LEFT JOIN test_table_join_3 AS t3 ON t2_id = (t3.id AS t3_id); +LEFT JOIN test_table_join_3 AS t3 ON t2_id = (t3.id AS t3_id) ORDER BY ALL; 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 2 Join_1_Value_2 0 0 Join_3_Value_0 @@ -734,7 +734,7 @@ SELECT '--'; -- SELECT t1.id AS t1_id, t1.value, t2.id AS t2_id, t2.value, t3.id AS t3_id, t3.value FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON t1_id = t2_id -LEFT JOIN test_table_join_3 AS t3 ON t2_id = t3_id; +LEFT JOIN test_table_join_3 AS t3 ON t2_id = t3_id ORDER BY ALL; 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 2 Join_1_Value_2 0 0 Join_3_Value_0 @@ -744,184 +744,184 @@ SELECT 'JOIN ON without conditions'; JOIN ON without conditions SELECT test_table_join_1.id, test_table_join_1.value, test_table_join_2.id, test_table_join_2.value, test_table_join_3.id, test_table_join_3.value FROM test_table_join_1 LEFT JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id -RIGHT JOIN test_table_join_3 ON test_table_join_2.id = test_table_join_3.id; +RIGHT JOIN test_table_join_3 ON test_table_join_2.id = test_table_join_3.id ORDER BY ALL; +0 0 4 Join_3_Value_4 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 2 Join_1_Value_2 0 0 Join_3_Value_0 -0 0 4 Join_3_Value_4 SELECT '--'; -- SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id -RIGHT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +RIGHT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id ORDER BY ALL; +0 0 4 Join_3_Value_4 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 2 Join_1_Value_2 0 0 Join_3_Value_0 -0 0 4 Join_3_Value_4 SELECT '--'; -- SELECT t1.id, test_table_join_1.id, t1.value, test_table_join_1.value, t2.id, test_table_join_2.id, t2.value, test_table_join_2.value, t3.id, test_table_join_3.id, t3.value, test_table_join_3.value FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id -RIGHT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +RIGHT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id ORDER BY ALL; +0 0 0 0 4 4 Join_3_Value_4 Join_3_Value_4 0 0 Join_1_Value_0 Join_1_Value_0 0 0 Join_2_Value_0 Join_2_Value_0 0 0 Join_3_Value_0 Join_3_Value_0 1 1 Join_1_Value_1 Join_1_Value_1 1 1 Join_2_Value_1 Join_2_Value_1 1 1 Join_3_Value_1 Join_3_Value_1 2 2 Join_1_Value_2 Join_1_Value_2 0 0 0 0 Join_3_Value_0 Join_3_Value_0 -0 0 0 0 4 4 Join_3_Value_4 Join_3_Value_4 SELECT '--'; -- SELECT t1.value, t2.value, t3.value FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id -RIGHT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +RIGHT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id ORDER BY ALL; + Join_3_Value_4 Join_1_Value_0 Join_2_Value_0 Join_3_Value_0 Join_1_Value_1 Join_2_Value_1 Join_3_Value_1 Join_1_Value_2 Join_3_Value_0 - Join_3_Value_4 SELECT 'JOIN ON with conditions'; JOIN ON with conditions SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value == 'Join_1_Value_0' -RIGHT JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0'; -0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 -0 0 4 Join_3_Value_4 +RIGHT JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0' ORDER BY ALL; 0 0 1 Join_3_Value_1 +0 0 4 Join_3_Value_4 +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 SELECT '--'; -- SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value == 'Join_1_Value_0' AND t2.value == 'Join_2_Value_0' -RIGHT JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0' AND t3.value == 'Join_3_Value_0'; -0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +RIGHT JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0' AND t3.value == 'Join_3_Value_0' ORDER BY ALL; 0 0 1 Join_3_Value_1 0 0 4 Join_3_Value_4 +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 SELECT 'JOIN multiple clauses'; JOIN multiple clauses SELECT t1.value, t2.value, t3.value FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON t1.id = t2.id OR t1.id = t2.id -RIGHT JOIN test_table_join_3 AS t3 ON t2.id = t3.id OR t3.id = t2.id; +RIGHT JOIN test_table_join_3 AS t3 ON t2.id = t3.id OR t3.id = t2.id ORDER BY ALL; + Join_3_Value_4 Join_1_Value_0 Join_2_Value_0 Join_3_Value_0 Join_1_Value_1 Join_2_Value_1 Join_3_Value_1 Join_1_Value_2 Join_3_Value_0 - Join_3_Value_4 SELECT 'JOIN expression aliases'; JOIN expression aliases SELECT t1_id, t1.value, t2_id, t2.value, t3_id, t3.value FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON (t1.id AS t1_id) = (t2.id AS t2_id) -RIGHT JOIN test_table_join_3 AS t3 ON t2_id = (t3.id AS t3_id); +RIGHT JOIN test_table_join_3 AS t3 ON t2_id = (t3.id AS t3_id) ORDER BY ALL; +0 0 4 Join_3_Value_4 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 2 Join_1_Value_2 0 0 Join_3_Value_0 -0 0 4 Join_3_Value_4 SELECT '--'; -- SELECT t1.id AS t1_id, t1.value, t2.id AS t2_id, t2.value, t3.id AS t3_id, t3.value FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON t1_id = t2_id -RIGHT JOIN test_table_join_3 AS t3 ON t2_id = t3_id; +RIGHT JOIN test_table_join_3 AS t3 ON t2_id = t3_id ORDER BY ALL; +0 0 4 Join_3_Value_4 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 2 Join_1_Value_2 0 0 Join_3_Value_0 -0 0 4 Join_3_Value_4 SELECT 'First JOIN LEFT second JOIN FULL'; First JOIN LEFT second JOIN FULL SELECT 'JOIN ON without conditions'; JOIN ON without conditions SELECT test_table_join_1.id, test_table_join_1.value, test_table_join_2.id, test_table_join_2.value, test_table_join_3.id, test_table_join_3.value FROM test_table_join_1 LEFT JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id -FULL JOIN test_table_join_3 ON test_table_join_2.id = test_table_join_3.id; +FULL JOIN test_table_join_3 ON test_table_join_2.id = test_table_join_3.id ORDER BY ALL; +0 0 4 Join_3_Value_4 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 2 Join_1_Value_2 0 0 Join_3_Value_0 -0 0 4 Join_3_Value_4 SELECT '--'; -- SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id -FULL JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +FULL JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id ORDER BY ALL; +0 0 4 Join_3_Value_4 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 2 Join_1_Value_2 0 0 Join_3_Value_0 -0 0 4 Join_3_Value_4 SELECT '--'; -- SELECT t1.id, test_table_join_1.id, t1.value, test_table_join_1.value, t2.id, test_table_join_2.id, t2.value, test_table_join_2.value, t3.id, test_table_join_3.id, t3.value, test_table_join_3.value FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id -FULL JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +FULL JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id ORDER BY ALL; +0 0 0 0 4 4 Join_3_Value_4 Join_3_Value_4 0 0 Join_1_Value_0 Join_1_Value_0 0 0 Join_2_Value_0 Join_2_Value_0 0 0 Join_3_Value_0 Join_3_Value_0 1 1 Join_1_Value_1 Join_1_Value_1 1 1 Join_2_Value_1 Join_2_Value_1 1 1 Join_3_Value_1 Join_3_Value_1 2 2 Join_1_Value_2 Join_1_Value_2 0 0 0 0 Join_3_Value_0 Join_3_Value_0 -0 0 0 0 4 4 Join_3_Value_4 Join_3_Value_4 SELECT '--'; -- SELECT t1.value, t2.value, t3.value FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id -FULL JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +FULL JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id ORDER BY ALL; + Join_3_Value_4 Join_1_Value_0 Join_2_Value_0 Join_3_Value_0 Join_1_Value_1 Join_2_Value_1 Join_3_Value_1 Join_1_Value_2 Join_3_Value_0 - Join_3_Value_4 SELECT 'JOIN ON with conditions'; JOIN ON with conditions SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value == 'Join_1_Value_0' -FULL JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0'; +FULL JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0' ORDER BY ALL; +0 0 1 Join_3_Value_1 +0 0 4 Join_3_Value_4 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 0 0 2 Join_1_Value_2 0 0 -0 0 4 Join_3_Value_4 -0 0 1 Join_3_Value_1 SELECT '--'; -- SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value == 'Join_1_Value_0' AND t2.value == 'Join_2_Value_0' -FULL JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0' AND t3.value == 'Join_3_Value_0'; +FULL JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0' AND t3.value == 'Join_3_Value_0' ORDER BY ALL; +0 0 1 Join_3_Value_1 +0 0 4 Join_3_Value_4 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 0 0 2 Join_1_Value_2 0 0 -0 0 1 Join_3_Value_1 -0 0 4 Join_3_Value_4 SELECT 'JOIN multiple clauses'; JOIN multiple clauses SELECT t1.value, t2.value, t3.value FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON t1.id = t2.id OR t1.id = t2.id -FULL JOIN test_table_join_3 AS t3 ON t2.id = t3.id OR t3.id = t2.id; +FULL JOIN test_table_join_3 AS t3 ON t2.id = t3.id OR t3.id = t2.id ORDER BY ALL; + Join_3_Value_4 Join_1_Value_0 Join_2_Value_0 Join_3_Value_0 Join_1_Value_1 Join_2_Value_1 Join_3_Value_1 Join_1_Value_2 Join_3_Value_0 - Join_3_Value_4 SELECT 'JOIN expression aliases'; JOIN expression aliases SELECT t1_id, t1.value, t2_id, t2.value, t3_id, t3.value FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON (t1.id AS t1_id) = (t2.id AS t2_id) -FULL JOIN test_table_join_3 AS t3 ON t2_id = (t3.id AS t3_id); +FULL JOIN test_table_join_3 AS t3 ON t2_id = (t3.id AS t3_id) ORDER BY ALL; +0 0 4 Join_3_Value_4 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 2 Join_1_Value_2 0 0 Join_3_Value_0 -0 0 4 Join_3_Value_4 SELECT '--'; -- SELECT t1.id AS t1_id, t1.value, t2.id AS t2_id, t2.value, t3.id AS t3_id, t3.value FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON t1_id = t2_id -FULL JOIN test_table_join_3 AS t3 ON t2_id = t3_id; +FULL JOIN test_table_join_3 AS t3 ON t2_id = t3_id ORDER BY ALL; +0 0 4 Join_3_Value_4 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 2 Join_1_Value_2 0 0 Join_3_Value_0 -0 0 4 Join_3_Value_4 SELECT 'First JOIN RIGHT second JOIN INNER'; First JOIN RIGHT second JOIN INNER SELECT 'JOIN ON without conditions'; JOIN ON without conditions SELECT test_table_join_1.id, test_table_join_1.value, test_table_join_2.id, test_table_join_2.value, test_table_join_3.id, test_table_join_3.value FROM test_table_join_1 RIGHT JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id -INNER JOIN test_table_join_3 ON test_table_join_2.id = test_table_join_3.id; +INNER JOIN test_table_join_3 ON test_table_join_2.id = test_table_join_3.id ORDER BY ALL; 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 SELECT '--'; -- SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id -INNER JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +INNER JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id ORDER BY ALL; 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 SELECT '--'; @@ -929,48 +929,48 @@ SELECT '--'; SELECT t1.id, test_table_join_1.id, t1.value, test_table_join_1.value, t2.id, test_table_join_2.id, t2.value, test_table_join_2.value, t3.id, test_table_join_3.id, t3.value, test_table_join_3.value FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id -INNER JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +INNER JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id ORDER BY ALL; 0 0 Join_1_Value_0 Join_1_Value_0 0 0 Join_2_Value_0 Join_2_Value_0 0 0 Join_3_Value_0 Join_3_Value_0 1 1 Join_1_Value_1 Join_1_Value_1 1 1 Join_2_Value_1 Join_2_Value_1 1 1 Join_3_Value_1 Join_3_Value_1 SELECT '--'; -- SELECT t1.value, t2.value, t3.value FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id -INNER JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +INNER JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id ORDER BY ALL; Join_1_Value_0 Join_2_Value_0 Join_3_Value_0 Join_1_Value_1 Join_2_Value_1 Join_3_Value_1 SELECT 'JOIN ON with conditions'; JOIN ON with conditions SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value == 'Join_1_Value_0' -INNER JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0'; +INNER JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0' ORDER BY ALL; 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 SELECT '--'; -- SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value == 'Join_1_Value_0' AND t2.value == 'Join_2_Value_0' -INNER JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0' AND t3.value == 'Join_3_Value_0'; +INNER JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0' AND t3.value == 'Join_3_Value_0' ORDER BY ALL; 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 SELECT 'JOIN multiple clauses'; JOIN multiple clauses SELECT t1.value, t2.value, t3.value FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON t1.id = t2.id OR t1.id = t2.id -INNER JOIN test_table_join_3 AS t3 ON t2.id = t3.id OR t3.id = t2.id; +INNER JOIN test_table_join_3 AS t3 ON t2.id = t3.id OR t3.id = t2.id ORDER BY ALL; Join_1_Value_0 Join_2_Value_0 Join_3_Value_0 Join_1_Value_1 Join_2_Value_1 Join_3_Value_1 SELECT 'JOIN expression aliases'; JOIN expression aliases SELECT t1_id, t1.value, t2_id, t2.value, t3_id, t3.value FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON (t1.id AS t1_id) = (t2.id AS t2_id) -INNER JOIN test_table_join_3 AS t3 ON t2_id = (t3.id AS t3_id); +INNER JOIN test_table_join_3 AS t3 ON t2_id = (t3.id AS t3_id) ORDER BY ALL; 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 SELECT '--'; -- SELECT t1.id AS t1_id, t1.value, t2.id AS t2_id, t2.value, t3.id AS t3_id, t3.value FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON t1_id = t2_id -INNER JOIN test_table_join_3 AS t3 ON t2_id = t3_id; +INNER JOIN test_table_join_3 AS t3 ON t2_id = t3_id ORDER BY ALL; 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 SELECT 'First JOIN RIGHT second JOIN LEFT'; @@ -979,246 +979,246 @@ SELECT 'JOIN ON without conditions'; JOIN ON without conditions SELECT test_table_join_1.id, test_table_join_1.value, test_table_join_2.id, test_table_join_2.value, test_table_join_3.id, test_table_join_3.value FROM test_table_join_1 RIGHT JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id -LEFT JOIN test_table_join_3 ON test_table_join_2.id = test_table_join_3.id; +LEFT JOIN test_table_join_3 ON test_table_join_2.id = test_table_join_3.id ORDER BY ALL; +0 3 Join_2_Value_3 0 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 -0 3 Join_2_Value_3 0 SELECT '--'; -- SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id -LEFT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +LEFT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id ORDER BY ALL; +0 3 Join_2_Value_3 0 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 -0 3 Join_2_Value_3 0 SELECT '--'; -- SELECT t1.id, test_table_join_1.id, t1.value, test_table_join_1.value, t2.id, test_table_join_2.id, t2.value, test_table_join_2.value, t3.id, test_table_join_3.id, t3.value, test_table_join_3.value FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id -LEFT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +LEFT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id ORDER BY ALL; +0 0 3 3 Join_2_Value_3 Join_2_Value_3 0 0 0 0 Join_1_Value_0 Join_1_Value_0 0 0 Join_2_Value_0 Join_2_Value_0 0 0 Join_3_Value_0 Join_3_Value_0 1 1 Join_1_Value_1 Join_1_Value_1 1 1 Join_2_Value_1 Join_2_Value_1 1 1 Join_3_Value_1 Join_3_Value_1 -0 0 3 3 Join_2_Value_3 Join_2_Value_3 0 0 SELECT '--'; -- SELECT t1.value, t2.value, t3.value FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id -LEFT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +LEFT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id ORDER BY ALL; + Join_2_Value_3 Join_1_Value_0 Join_2_Value_0 Join_3_Value_0 Join_1_Value_1 Join_2_Value_1 Join_3_Value_1 - Join_2_Value_3 SELECT 'JOIN ON with conditions'; JOIN ON with conditions SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value == 'Join_1_Value_0' -LEFT JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0'; -0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 -0 3 Join_2_Value_3 0 +LEFT JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0' ORDER BY ALL; 0 1 Join_2_Value_1 0 +0 3 Join_2_Value_3 0 +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 SELECT '--'; -- SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value == 'Join_1_Value_0' AND t2.value == 'Join_2_Value_0' -LEFT JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0' AND t3.value == 'Join_3_Value_0'; -0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +LEFT JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0' AND t3.value == 'Join_3_Value_0' ORDER BY ALL; 0 1 Join_2_Value_1 0 0 3 Join_2_Value_3 0 +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 SELECT 'JOIN multiple clauses'; JOIN multiple clauses SELECT t1.value, t2.value, t3.value FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON t1.id = t2.id OR t1.id = t2.id -LEFT JOIN test_table_join_3 AS t3 ON t2.id = t3.id OR t3.id = t2.id; +LEFT JOIN test_table_join_3 AS t3 ON t2.id = t3.id OR t3.id = t2.id ORDER BY ALL; + Join_2_Value_3 Join_1_Value_0 Join_2_Value_0 Join_3_Value_0 Join_1_Value_1 Join_2_Value_1 Join_3_Value_1 - Join_2_Value_3 SELECT 'JOIN expression aliases'; JOIN expression aliases SELECT t1_id, t1.value, t2_id, t2.value, t3_id, t3.value FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON (t1.id AS t1_id) = (t2.id AS t2_id) -LEFT JOIN test_table_join_3 AS t3 ON t2_id = (t3.id AS t3_id); +LEFT JOIN test_table_join_3 AS t3 ON t2_id = (t3.id AS t3_id) ORDER BY ALL; +0 3 Join_2_Value_3 0 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 -0 3 Join_2_Value_3 0 SELECT '--'; -- SELECT t1.id AS t1_id, t1.value, t2.id AS t2_id, t2.value, t3.id AS t3_id, t3.value FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON t1_id = t2_id -LEFT JOIN test_table_join_3 AS t3 ON t2_id = t3_id; +LEFT JOIN test_table_join_3 AS t3 ON t2_id = t3_id ORDER BY ALL; +0 3 Join_2_Value_3 0 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 -0 3 Join_2_Value_3 0 SELECT 'First JOIN RIGHT second JOIN RIGHT'; First JOIN RIGHT second JOIN RIGHT SELECT 'JOIN ON without conditions'; JOIN ON without conditions SELECT test_table_join_1.id, test_table_join_1.value, test_table_join_2.id, test_table_join_2.value, test_table_join_3.id, test_table_join_3.value FROM test_table_join_1 RIGHT JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id -RIGHT JOIN test_table_join_3 ON test_table_join_2.id = test_table_join_3.id; +RIGHT JOIN test_table_join_3 ON test_table_join_2.id = test_table_join_3.id ORDER BY ALL; +0 0 4 Join_3_Value_4 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 -0 0 4 Join_3_Value_4 SELECT '--'; -- SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id -RIGHT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +RIGHT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id ORDER BY ALL; +0 0 4 Join_3_Value_4 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 -0 0 4 Join_3_Value_4 SELECT '--'; -- SELECT t1.id, test_table_join_1.id, t1.value, test_table_join_1.value, t2.id, test_table_join_2.id, t2.value, test_table_join_2.value, t3.id, test_table_join_3.id, t3.value, test_table_join_3.value FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id -RIGHT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +RIGHT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id ORDER BY ALL; +0 0 0 0 4 4 Join_3_Value_4 Join_3_Value_4 0 0 Join_1_Value_0 Join_1_Value_0 0 0 Join_2_Value_0 Join_2_Value_0 0 0 Join_3_Value_0 Join_3_Value_0 1 1 Join_1_Value_1 Join_1_Value_1 1 1 Join_2_Value_1 Join_2_Value_1 1 1 Join_3_Value_1 Join_3_Value_1 -0 0 0 0 4 4 Join_3_Value_4 Join_3_Value_4 SELECT '--'; -- SELECT t1.value, t2.value, t3.value FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id -RIGHT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +RIGHT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id ORDER BY ALL; + Join_3_Value_4 Join_1_Value_0 Join_2_Value_0 Join_3_Value_0 Join_1_Value_1 Join_2_Value_1 Join_3_Value_1 - Join_3_Value_4 SELECT 'JOIN ON with conditions'; JOIN ON with conditions SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value == 'Join_1_Value_0' -RIGHT JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0'; -0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 -0 0 4 Join_3_Value_4 +RIGHT JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0' ORDER BY ALL; 0 0 1 Join_3_Value_1 +0 0 4 Join_3_Value_4 +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 SELECT '--'; -- SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value == 'Join_1_Value_0' AND t2.value == 'Join_2_Value_0' -RIGHT JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0' AND t3.value == 'Join_3_Value_0'; -0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +RIGHT JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0' AND t3.value == 'Join_3_Value_0' ORDER BY ALL; 0 0 1 Join_3_Value_1 0 0 4 Join_3_Value_4 +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 SELECT 'JOIN multiple clauses'; JOIN multiple clauses SELECT t1.value, t2.value, t3.value FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON t1.id = t2.id OR t1.id = t2.id -RIGHT JOIN test_table_join_3 AS t3 ON t2.id = t3.id OR t3.id = t2.id; +RIGHT JOIN test_table_join_3 AS t3 ON t2.id = t3.id OR t3.id = t2.id ORDER BY ALL; + Join_3_Value_4 Join_1_Value_0 Join_2_Value_0 Join_3_Value_0 Join_1_Value_1 Join_2_Value_1 Join_3_Value_1 - Join_3_Value_4 SELECT 'JOIN expression aliases'; JOIN expression aliases SELECT t1_id, t1.value, t2_id, t2.value, t3_id, t3.value FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON (t1.id AS t1_id) = (t2.id AS t2_id) -RIGHT JOIN test_table_join_3 AS t3 ON t2_id = (t3.id AS t3_id); +RIGHT JOIN test_table_join_3 AS t3 ON t2_id = (t3.id AS t3_id) ORDER BY ALL; +0 0 4 Join_3_Value_4 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 -0 0 4 Join_3_Value_4 SELECT '--'; -- SELECT t1.id AS t1_id, t1.value, t2.id AS t2_id, t2.value, t3.id AS t3_id, t3.value FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON t1_id = t2_id -RIGHT JOIN test_table_join_3 AS t3 ON t2_id = t3_id; +RIGHT JOIN test_table_join_3 AS t3 ON t2_id = t3_id ORDER BY ALL; +0 0 4 Join_3_Value_4 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 -0 0 4 Join_3_Value_4 SELECT 'First JOIN RIGHT second JOIN FULL'; First JOIN RIGHT second JOIN FULL SELECT 'JOIN ON without conditions'; JOIN ON without conditions SELECT test_table_join_1.id, test_table_join_1.value, test_table_join_2.id, test_table_join_2.value, test_table_join_3.id, test_table_join_3.value FROM test_table_join_1 RIGHT JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id -FULL JOIN test_table_join_3 ON test_table_join_2.id = test_table_join_3.id; +FULL JOIN test_table_join_3 ON test_table_join_2.id = test_table_join_3.id ORDER BY ALL; +0 0 4 Join_3_Value_4 +0 3 Join_2_Value_3 0 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 -0 3 Join_2_Value_3 0 -0 0 4 Join_3_Value_4 SELECT '--'; -- SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id -FULL JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +FULL JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id ORDER BY ALL; +0 0 4 Join_3_Value_4 +0 3 Join_2_Value_3 0 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 -0 3 Join_2_Value_3 0 -0 0 4 Join_3_Value_4 SELECT '--'; -- SELECT t1.id, test_table_join_1.id, t1.value, test_table_join_1.value, t2.id, test_table_join_2.id, t2.value, test_table_join_2.value, t3.id, test_table_join_3.id, t3.value, test_table_join_3.value FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id -FULL JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +FULL JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id ORDER BY ALL; +0 0 0 0 4 4 Join_3_Value_4 Join_3_Value_4 +0 0 3 3 Join_2_Value_3 Join_2_Value_3 0 0 0 0 Join_1_Value_0 Join_1_Value_0 0 0 Join_2_Value_0 Join_2_Value_0 0 0 Join_3_Value_0 Join_3_Value_0 1 1 Join_1_Value_1 Join_1_Value_1 1 1 Join_2_Value_1 Join_2_Value_1 1 1 Join_3_Value_1 Join_3_Value_1 -0 0 3 3 Join_2_Value_3 Join_2_Value_3 0 0 -0 0 0 0 4 4 Join_3_Value_4 Join_3_Value_4 SELECT '--'; -- SELECT t1.value, t2.value, t3.value FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id -FULL JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +FULL JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id ORDER BY ALL; + Join_3_Value_4 + Join_2_Value_3 Join_1_Value_0 Join_2_Value_0 Join_3_Value_0 Join_1_Value_1 Join_2_Value_1 Join_3_Value_1 - Join_2_Value_3 - Join_3_Value_4 SELECT 'JOIN ON with conditions'; JOIN ON with conditions SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value == 'Join_1_Value_0' -FULL JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0'; -0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 -0 3 Join_2_Value_3 0 -0 1 Join_2_Value_1 0 -0 0 4 Join_3_Value_4 +FULL JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0' ORDER BY ALL; 0 0 1 Join_3_Value_1 +0 0 4 Join_3_Value_4 +0 1 Join_2_Value_1 0 +0 3 Join_2_Value_3 0 +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 SELECT '--'; -- SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value == 'Join_1_Value_0' AND t2.value == 'Join_2_Value_0' -FULL JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0' AND t3.value == 'Join_3_Value_0'; -0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 -0 1 Join_2_Value_1 0 -0 3 Join_2_Value_3 0 +FULL JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0' AND t3.value == 'Join_3_Value_0' ORDER BY ALL; 0 0 1 Join_3_Value_1 0 0 4 Join_3_Value_4 +0 1 Join_2_Value_1 0 +0 3 Join_2_Value_3 0 +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 SELECT 'JOIN multiple clauses'; JOIN multiple clauses SELECT t1.value, t2.value, t3.value FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON t1.id = t2.id OR t1.id = t2.id -FULL JOIN test_table_join_3 AS t3 ON t2.id = t3.id OR t3.id = t2.id; +FULL JOIN test_table_join_3 AS t3 ON t2.id = t3.id OR t3.id = t2.id ORDER BY ALL; + Join_3_Value_4 + Join_2_Value_3 Join_1_Value_0 Join_2_Value_0 Join_3_Value_0 Join_1_Value_1 Join_2_Value_1 Join_3_Value_1 - Join_2_Value_3 - Join_3_Value_4 SELECT 'JOIN expression aliases'; JOIN expression aliases SELECT t1_id, t1.value, t2_id, t2.value, t3_id, t3.value FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON (t1.id AS t1_id) = (t2.id AS t2_id) -FULL JOIN test_table_join_3 AS t3 ON t2_id = (t3.id AS t3_id); +FULL JOIN test_table_join_3 AS t3 ON t2_id = (t3.id AS t3_id) ORDER BY ALL; +0 0 4 Join_3_Value_4 +0 3 Join_2_Value_3 0 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 -0 3 Join_2_Value_3 0 -0 0 4 Join_3_Value_4 SELECT '--'; -- SELECT t1.id AS t1_id, t1.value, t2.id AS t2_id, t2.value, t3.id AS t3_id, t3.value FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON t1_id = t2_id -FULL JOIN test_table_join_3 AS t3 ON t2_id = t3_id; +FULL JOIN test_table_join_3 AS t3 ON t2_id = t3_id ORDER BY ALL; +0 0 4 Join_3_Value_4 +0 3 Join_2_Value_3 0 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 -0 3 Join_2_Value_3 0 -0 0 4 Join_3_Value_4 SELECT 'First JOIN FULL second JOIN INNER'; First JOIN FULL second JOIN INNER SELECT 'JOIN ON without conditions'; JOIN ON without conditions SELECT test_table_join_1.id, test_table_join_1.value, test_table_join_2.id, test_table_join_2.value, test_table_join_3.id, test_table_join_3.value FROM test_table_join_1 FULL JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id -INNER JOIN test_table_join_3 ON test_table_join_2.id = test_table_join_3.id; +INNER JOIN test_table_join_3 ON test_table_join_2.id = test_table_join_3.id ORDER BY ALL; 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 2 Join_1_Value_2 0 0 Join_3_Value_0 @@ -1226,7 +1226,7 @@ SELECT '--'; -- SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id -INNER JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +INNER JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id ORDER BY ALL; 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 2 Join_1_Value_2 0 0 Join_3_Value_0 @@ -1235,7 +1235,7 @@ SELECT '--'; SELECT t1.id, test_table_join_1.id, t1.value, test_table_join_1.value, t2.id, test_table_join_2.id, t2.value, test_table_join_2.value, t3.id, test_table_join_3.id, t3.value, test_table_join_3.value FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id -INNER JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +INNER JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id ORDER BY ALL; 0 0 Join_1_Value_0 Join_1_Value_0 0 0 Join_2_Value_0 Join_2_Value_0 0 0 Join_3_Value_0 Join_3_Value_0 1 1 Join_1_Value_1 Join_1_Value_1 1 1 Join_2_Value_1 Join_2_Value_1 1 1 Join_3_Value_1 Join_3_Value_1 2 2 Join_1_Value_2 Join_1_Value_2 0 0 0 0 Join_3_Value_0 Join_3_Value_0 @@ -1243,7 +1243,7 @@ SELECT '--'; -- SELECT t1.value, t2.value, t3.value FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id -INNER JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +INNER JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id ORDER BY ALL; Join_1_Value_0 Join_2_Value_0 Join_3_Value_0 Join_1_Value_1 Join_2_Value_1 Join_3_Value_1 Join_1_Value_2 Join_3_Value_0 @@ -1251,20 +1251,20 @@ SELECT 'JOIN ON with conditions'; JOIN ON with conditions SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value == 'Join_1_Value_0' -INNER JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0'; +INNER JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0' ORDER BY ALL; 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 SELECT '--'; -- SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value == 'Join_1_Value_0' AND t2.value == 'Join_2_Value_0' -INNER JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0' AND t3.value == 'Join_3_Value_0'; +INNER JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0' AND t3.value == 'Join_3_Value_0' ORDER BY ALL; 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 SELECT 'JOIN multiple clauses'; JOIN multiple clauses SELECT t1.value, t2.value, t3.value FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON t1.id = t2.id OR t1.id = t2.id -INNER JOIN test_table_join_3 AS t3 ON t2.id = t3.id OR t3.id = t2.id; +INNER JOIN test_table_join_3 AS t3 ON t2.id = t3.id OR t3.id = t2.id ORDER BY ALL; Join_1_Value_0 Join_2_Value_0 Join_3_Value_0 Join_1_Value_1 Join_2_Value_1 Join_3_Value_1 Join_1_Value_2 Join_3_Value_0 @@ -1272,7 +1272,7 @@ SELECT 'JOIN expression aliases'; JOIN expression aliases SELECT t1_id, t1.value, t2_id, t2.value, t3_id, t3.value FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON (t1.id AS t1_id) = (t2.id AS t2_id) -INNER JOIN test_table_join_3 AS t3 ON t2_id = (t3.id AS t3_id); +INNER JOIN test_table_join_3 AS t3 ON t2_id = (t3.id AS t3_id) ORDER BY ALL; 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 2 Join_1_Value_2 0 0 Join_3_Value_0 @@ -1280,7 +1280,7 @@ SELECT '--'; -- SELECT t1.id AS t1_id, t1.value, t2.id AS t2_id, t2.value, t3.id AS t3_id, t3.value FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON t1_id = t2_id -INNER JOIN test_table_join_3 AS t3 ON t2_id = t3_id; +INNER JOIN test_table_join_3 AS t3 ON t2_id = t3_id ORDER BY ALL; 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 2 Join_1_Value_2 0 0 Join_3_Value_0 @@ -1290,265 +1290,265 @@ SELECT 'JOIN ON without conditions'; JOIN ON without conditions SELECT test_table_join_1.id, test_table_join_1.value, test_table_join_2.id, test_table_join_2.value, test_table_join_3.id, test_table_join_3.value FROM test_table_join_1 FULL JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id -LEFT JOIN test_table_join_3 ON test_table_join_2.id = test_table_join_3.id; +LEFT JOIN test_table_join_3 ON test_table_join_2.id = test_table_join_3.id ORDER BY ALL; +0 3 Join_2_Value_3 0 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 2 Join_1_Value_2 0 0 Join_3_Value_0 -0 3 Join_2_Value_3 0 SELECT '--'; -- SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id -LEFT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +LEFT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id ORDER BY ALL; +0 3 Join_2_Value_3 0 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 2 Join_1_Value_2 0 0 Join_3_Value_0 -0 3 Join_2_Value_3 0 SELECT '--'; -- SELECT t1.id, test_table_join_1.id, t1.value, test_table_join_1.value, t2.id, test_table_join_2.id, t2.value, test_table_join_2.value, t3.id, test_table_join_3.id, t3.value, test_table_join_3.value FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id -LEFT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +LEFT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id ORDER BY ALL; +0 0 3 3 Join_2_Value_3 Join_2_Value_3 0 0 0 0 Join_1_Value_0 Join_1_Value_0 0 0 Join_2_Value_0 Join_2_Value_0 0 0 Join_3_Value_0 Join_3_Value_0 1 1 Join_1_Value_1 Join_1_Value_1 1 1 Join_2_Value_1 Join_2_Value_1 1 1 Join_3_Value_1 Join_3_Value_1 2 2 Join_1_Value_2 Join_1_Value_2 0 0 0 0 Join_3_Value_0 Join_3_Value_0 -0 0 3 3 Join_2_Value_3 Join_2_Value_3 0 0 SELECT '--'; -- SELECT t1.value, t2.value, t3.value FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id -LEFT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +LEFT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id ORDER BY ALL; + Join_2_Value_3 Join_1_Value_0 Join_2_Value_0 Join_3_Value_0 Join_1_Value_1 Join_2_Value_1 Join_3_Value_1 Join_1_Value_2 Join_3_Value_0 - Join_2_Value_3 SELECT 'JOIN ON with conditions'; JOIN ON with conditions SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value == 'Join_1_Value_0' -LEFT JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0'; +LEFT JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0' ORDER BY ALL; +0 1 Join_2_Value_1 0 +0 3 Join_2_Value_3 0 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 0 0 2 Join_1_Value_2 0 0 -0 3 Join_2_Value_3 0 -0 1 Join_2_Value_1 0 SELECT '--'; -- SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value == 'Join_1_Value_0' AND t2.value == 'Join_2_Value_0' -LEFT JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0' AND t3.value == 'Join_3_Value_0'; +LEFT JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0' AND t3.value == 'Join_3_Value_0' ORDER BY ALL; +0 1 Join_2_Value_1 0 +0 3 Join_2_Value_3 0 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 0 0 2 Join_1_Value_2 0 0 -0 1 Join_2_Value_1 0 -0 3 Join_2_Value_3 0 SELECT 'JOIN multiple clauses'; JOIN multiple clauses SELECT t1.value, t2.value, t3.value FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON t1.id = t2.id OR t1.id = t2.id -LEFT JOIN test_table_join_3 AS t3 ON t2.id = t3.id OR t3.id = t2.id; +LEFT JOIN test_table_join_3 AS t3 ON t2.id = t3.id OR t3.id = t2.id ORDER BY ALL; + Join_2_Value_3 Join_1_Value_0 Join_2_Value_0 Join_3_Value_0 Join_1_Value_1 Join_2_Value_1 Join_3_Value_1 Join_1_Value_2 Join_3_Value_0 - Join_2_Value_3 SELECT 'JOIN expression aliases'; JOIN expression aliases SELECT t1_id, t1.value, t2_id, t2.value, t3_id, t3.value FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON (t1.id AS t1_id) = (t2.id AS t2_id) -LEFT JOIN test_table_join_3 AS t3 ON t2_id = (t3.id AS t3_id); +LEFT JOIN test_table_join_3 AS t3 ON t2_id = (t3.id AS t3_id) ORDER BY ALL; +0 3 Join_2_Value_3 0 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 2 Join_1_Value_2 0 0 Join_3_Value_0 -0 3 Join_2_Value_3 0 SELECT '--'; -- SELECT t1.id AS t1_id, t1.value, t2.id AS t2_id, t2.value, t3.id AS t3_id, t3.value FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON t1_id = t2_id -LEFT JOIN test_table_join_3 AS t3 ON t2_id = t3_id; +LEFT JOIN test_table_join_3 AS t3 ON t2_id = t3_id ORDER BY ALL; +0 3 Join_2_Value_3 0 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 2 Join_1_Value_2 0 0 Join_3_Value_0 -0 3 Join_2_Value_3 0 SELECT 'First JOIN FULL second JOIN RIGHT'; First JOIN FULL second JOIN RIGHT SELECT 'JOIN ON without conditions'; JOIN ON without conditions SELECT test_table_join_1.id, test_table_join_1.value, test_table_join_2.id, test_table_join_2.value, test_table_join_3.id, test_table_join_3.value FROM test_table_join_1 FULL JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id -RIGHT JOIN test_table_join_3 ON test_table_join_2.id = test_table_join_3.id; +RIGHT JOIN test_table_join_3 ON test_table_join_2.id = test_table_join_3.id ORDER BY ALL; +0 0 4 Join_3_Value_4 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 2 Join_1_Value_2 0 0 Join_3_Value_0 -0 0 4 Join_3_Value_4 SELECT '--'; -- SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id -RIGHT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +RIGHT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id ORDER BY ALL; +0 0 4 Join_3_Value_4 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 2 Join_1_Value_2 0 0 Join_3_Value_0 -0 0 4 Join_3_Value_4 SELECT '--'; -- SELECT t1.id, test_table_join_1.id, t1.value, test_table_join_1.value, t2.id, test_table_join_2.id, t2.value, test_table_join_2.value, t3.id, test_table_join_3.id, t3.value, test_table_join_3.value FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id -RIGHT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +RIGHT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id ORDER BY ALL; +0 0 0 0 4 4 Join_3_Value_4 Join_3_Value_4 0 0 Join_1_Value_0 Join_1_Value_0 0 0 Join_2_Value_0 Join_2_Value_0 0 0 Join_3_Value_0 Join_3_Value_0 1 1 Join_1_Value_1 Join_1_Value_1 1 1 Join_2_Value_1 Join_2_Value_1 1 1 Join_3_Value_1 Join_3_Value_1 2 2 Join_1_Value_2 Join_1_Value_2 0 0 0 0 Join_3_Value_0 Join_3_Value_0 -0 0 0 0 4 4 Join_3_Value_4 Join_3_Value_4 SELECT '--'; -- SELECT t1.value, t2.value, t3.value FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id -RIGHT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +RIGHT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id ORDER BY ALL; + Join_3_Value_4 Join_1_Value_0 Join_2_Value_0 Join_3_Value_0 Join_1_Value_1 Join_2_Value_1 Join_3_Value_1 Join_1_Value_2 Join_3_Value_0 - Join_3_Value_4 SELECT 'JOIN ON with conditions'; JOIN ON with conditions SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value == 'Join_1_Value_0' -RIGHT JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0'; -0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 -0 0 4 Join_3_Value_4 +RIGHT JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0' ORDER BY ALL; 0 0 1 Join_3_Value_1 +0 0 4 Join_3_Value_4 +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 SELECT '--'; -- SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value == 'Join_1_Value_0' AND t2.value == 'Join_2_Value_0' -RIGHT JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0' AND t3.value == 'Join_3_Value_0'; -0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +RIGHT JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0' AND t3.value == 'Join_3_Value_0' ORDER BY ALL; 0 0 1 Join_3_Value_1 0 0 4 Join_3_Value_4 +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 SELECT 'JOIN multiple clauses'; JOIN multiple clauses SELECT t1.value, t2.value, t3.value FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON t1.id = t2.id OR t1.id = t2.id -RIGHT JOIN test_table_join_3 AS t3 ON t2.id = t3.id OR t3.id = t2.id; +RIGHT JOIN test_table_join_3 AS t3 ON t2.id = t3.id OR t3.id = t2.id ORDER BY ALL; + Join_3_Value_4 Join_1_Value_0 Join_2_Value_0 Join_3_Value_0 Join_1_Value_1 Join_2_Value_1 Join_3_Value_1 Join_1_Value_2 Join_3_Value_0 - Join_3_Value_4 SELECT 'JOIN expression aliases'; JOIN expression aliases SELECT t1_id, t1.value, t2_id, t2.value, t3_id, t3.value FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON (t1.id AS t1_id) = (t2.id AS t2_id) -RIGHT JOIN test_table_join_3 AS t3 ON t2_id = (t3.id AS t3_id); +RIGHT JOIN test_table_join_3 AS t3 ON t2_id = (t3.id AS t3_id) ORDER BY ALL; +0 0 4 Join_3_Value_4 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 2 Join_1_Value_2 0 0 Join_3_Value_0 -0 0 4 Join_3_Value_4 SELECT '--'; -- SELECT t1.id AS t1_id, t1.value, t2.id AS t2_id, t2.value, t3.id AS t3_id, t3.value FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON t1_id = t2_id -RIGHT JOIN test_table_join_3 AS t3 ON t2_id = t3_id; +RIGHT JOIN test_table_join_3 AS t3 ON t2_id = t3_id ORDER BY ALL; +0 0 4 Join_3_Value_4 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 2 Join_1_Value_2 0 0 Join_3_Value_0 -0 0 4 Join_3_Value_4 SELECT 'First JOIN FULL second JOIN FULL'; First JOIN FULL second JOIN FULL SELECT 'JOIN ON without conditions'; JOIN ON without conditions SELECT test_table_join_1.id, test_table_join_1.value, test_table_join_2.id, test_table_join_2.value, test_table_join_3.id, test_table_join_3.value FROM test_table_join_1 FULL JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id -FULL JOIN test_table_join_3 ON test_table_join_2.id = test_table_join_3.id; +FULL JOIN test_table_join_3 ON test_table_join_2.id = test_table_join_3.id ORDER BY ALL; +0 0 4 Join_3_Value_4 +0 3 Join_2_Value_3 0 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 2 Join_1_Value_2 0 0 Join_3_Value_0 -0 3 Join_2_Value_3 0 -0 0 4 Join_3_Value_4 SELECT '--'; -- SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id -FULL JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +FULL JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id ORDER BY ALL; +0 0 4 Join_3_Value_4 +0 3 Join_2_Value_3 0 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 2 Join_1_Value_2 0 0 Join_3_Value_0 -0 3 Join_2_Value_3 0 -0 0 4 Join_3_Value_4 SELECT '--'; -- SELECT t1.id, test_table_join_1.id, t1.value, test_table_join_1.value, t2.id, test_table_join_2.id, t2.value, test_table_join_2.value, t3.id, test_table_join_3.id, t3.value, test_table_join_3.value FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id -FULL JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +FULL JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id ORDER BY ALL; +0 0 0 0 4 4 Join_3_Value_4 Join_3_Value_4 +0 0 3 3 Join_2_Value_3 Join_2_Value_3 0 0 0 0 Join_1_Value_0 Join_1_Value_0 0 0 Join_2_Value_0 Join_2_Value_0 0 0 Join_3_Value_0 Join_3_Value_0 1 1 Join_1_Value_1 Join_1_Value_1 1 1 Join_2_Value_1 Join_2_Value_1 1 1 Join_3_Value_1 Join_3_Value_1 2 2 Join_1_Value_2 Join_1_Value_2 0 0 0 0 Join_3_Value_0 Join_3_Value_0 -0 0 3 3 Join_2_Value_3 Join_2_Value_3 0 0 -0 0 0 0 4 4 Join_3_Value_4 Join_3_Value_4 SELECT '--'; -- SELECT t1.value, t2.value, t3.value FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id -FULL JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +FULL JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id ORDER BY ALL; + Join_3_Value_4 + Join_2_Value_3 Join_1_Value_0 Join_2_Value_0 Join_3_Value_0 Join_1_Value_1 Join_2_Value_1 Join_3_Value_1 Join_1_Value_2 Join_3_Value_0 - Join_2_Value_3 - Join_3_Value_4 SELECT 'JOIN ON with conditions'; JOIN ON with conditions SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value == 'Join_1_Value_0' -FULL JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0'; +FULL JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0' ORDER BY ALL; +0 0 1 Join_3_Value_1 +0 0 4 Join_3_Value_4 +0 1 Join_2_Value_1 0 +0 3 Join_2_Value_3 0 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 0 0 2 Join_1_Value_2 0 0 -0 3 Join_2_Value_3 0 -0 1 Join_2_Value_1 0 -0 0 4 Join_3_Value_4 -0 0 1 Join_3_Value_1 SELECT '--'; -- SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value == 'Join_1_Value_0' AND t2.value == 'Join_2_Value_0' -FULL JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0' AND t3.value == 'Join_3_Value_0'; +FULL JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0' AND t3.value == 'Join_3_Value_0' ORDER BY ALL; +0 0 1 Join_3_Value_1 +0 0 4 Join_3_Value_4 +0 1 Join_2_Value_1 0 +0 3 Join_2_Value_3 0 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 0 0 2 Join_1_Value_2 0 0 -0 1 Join_2_Value_1 0 -0 3 Join_2_Value_3 0 -0 0 1 Join_3_Value_1 -0 0 4 Join_3_Value_4 SELECT 'JOIN multiple clauses'; JOIN multiple clauses SELECT t1.value, t2.value, t3.value FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON t1.id = t2.id OR t1.id = t2.id -FULL JOIN test_table_join_3 AS t3 ON t2.id = t3.id OR t3.id = t2.id; +FULL JOIN test_table_join_3 AS t3 ON t2.id = t3.id OR t3.id = t2.id ORDER BY ALL; + Join_3_Value_4 + Join_2_Value_3 Join_1_Value_0 Join_2_Value_0 Join_3_Value_0 Join_1_Value_1 Join_2_Value_1 Join_3_Value_1 Join_1_Value_2 Join_3_Value_0 - Join_2_Value_3 - Join_3_Value_4 SELECT 'JOIN expression aliases'; JOIN expression aliases SELECT t1_id, t1.value, t2_id, t2.value, t3_id, t3.value FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON (t1.id AS t1_id) = (t2.id AS t2_id) -FULL JOIN test_table_join_3 AS t3 ON t2_id = (t3.id AS t3_id); +FULL JOIN test_table_join_3 AS t3 ON t2_id = (t3.id AS t3_id) ORDER BY ALL; +0 0 4 Join_3_Value_4 +0 3 Join_2_Value_3 0 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 2 Join_1_Value_2 0 0 Join_3_Value_0 -0 3 Join_2_Value_3 0 -0 0 4 Join_3_Value_4 SELECT '--'; -- SELECT t1.id AS t1_id, t1.value, t2.id AS t2_id, t2.value, t3.id AS t3_id, t3.value FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON t1_id = t2_id -FULL JOIN test_table_join_3 AS t3 ON t2_id = t3_id; +FULL JOIN test_table_join_3 AS t3 ON t2_id = t3_id ORDER BY ALL; +0 0 4 Join_3_Value_4 +0 3 Join_2_Value_3 0 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 2 Join_1_Value_2 0 0 Join_3_Value_0 -0 3 Join_2_Value_3 0 -0 0 4 Join_3_Value_4 diff --git a/tests/queries/0_stateless/02372_analyzer_join.sql.j2 b/tests/queries/0_stateless/02372_analyzer_join.sql.j2 index facf4dc018b..45ae63b9a49 100644 --- a/tests/queries/0_stateless/02372_analyzer_join.sql.j2 +++ b/tests/queries/0_stateless/02372_analyzer_join.sql.j2 @@ -45,59 +45,59 @@ SELECT 'JOIN {{ join_type }}'; SELECT 'JOIN ON without conditions'; SELECT test_table_join_1.id, test_table_join_1.value, test_table_join_2.id, test_table_join_2.value -FROM test_table_join_1 {{ join_type }} JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id; +FROM test_table_join_1 {{ join_type }} JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id ORDER BY ALL; SELECT '--'; SELECT t1.id, t1.value, t2.id, t2.value -FROM test_table_join_1 AS t1 {{ join_type }} JOIN test_table_join_2 AS t2 ON t1.id = t2.id; +FROM test_table_join_1 AS t1 {{ join_type }} JOIN test_table_join_2 AS t2 ON t1.id = t2.id ORDER BY ALL; SELECT '--'; SELECT t1.id, test_table_join_1.id, t1.value, test_table_join_1.value, t2.id, test_table_join_2.id, t2.value, test_table_join_2.value -FROM test_table_join_1 AS t1 {{ join_type }} JOIN test_table_join_2 AS t2 ON t1.id = t2.id; +FROM test_table_join_1 AS t1 {{ join_type }} JOIN test_table_join_2 AS t2 ON t1.id = t2.id ORDER BY ALL; SELECT '--'; SELECT t1.value, t2.value -FROM test_table_join_1 AS t1 {{ join_type }} JOIN test_table_join_2 AS t2 ON t1.id = t2.id; +FROM test_table_join_1 AS t1 {{ join_type }} JOIN test_table_join_2 AS t2 ON t1.id = t2.id ORDER BY ALL; -SELECT id FROM test_table_join_1 {{ join_type }} JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id; -- { serverError AMBIGUOUS_IDENTIFIER } +SELECT id FROM test_table_join_1 {{ join_type }} JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id ORDER BY ALL; -- { serverError AMBIGUOUS_IDENTIFIER } -SELECT value FROM test_table_join_1 {{ join_type }} JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id; -- { serverError AMBIGUOUS_IDENTIFIER } +SELECT value FROM test_table_join_1 {{ join_type }} JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id ORDER BY ALL; -- { serverError AMBIGUOUS_IDENTIFIER } SELECT 'JOIN ON with conditions'; SELECT t1.id, t1.value, t2.id, t2.value -FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value = 'Join_1_Value_0'; +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value = 'Join_1_Value_0' ORDER BY ALL; SELECT '--'; SELECT t1.id, t1.value, t2.id, t2.value -FROM test_table_join_1 AS t1 {{ join_type }} JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t2.value = 'Join_2_Value_0'; +FROM test_table_join_1 AS t1 {{ join_type }} JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t2.value = 'Join_2_Value_0' ORDER BY ALL; SELECT '--'; SELECT t1.id, t1.value, t2.id, t2.value -FROM test_table_join_1 AS t1 {{ join_type }} JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value = 'Join_1_Value_0' AND t2.value = 'Join_2_Value_0'; +FROM test_table_join_1 AS t1 {{ join_type }} JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value = 'Join_1_Value_0' AND t2.value = 'Join_2_Value_0' ORDER BY ALL; SELECT '--'; SELECT t1.id, t1.value, t2.id, t2.value -FROM test_table_join_1 AS t1 {{ join_type }} JOIN test_table_join_2 AS t2 ON toString(t1.id) = toString(t2.id) AND t1.value = 'Join_1_Value_0' AND t2.value = 'Join_2_Value_0'; +FROM test_table_join_1 AS t1 {{ join_type }} JOIN test_table_join_2 AS t2 ON toString(t1.id) = toString(t2.id) AND t1.value = 'Join_1_Value_0' AND t2.value = 'Join_2_Value_0' ORDER BY ALL; SELECT 'JOIN multiple clauses'; SELECT t1.id, t1.value, t2.id, t2.value -FROM test_table_join_1 AS t1 {{ join_type }} JOIN test_table_join_2 AS t2 ON t1.id = t2.id OR t1.id = t2.id; +FROM test_table_join_1 AS t1 {{ join_type }} JOIN test_table_join_2 AS t2 ON t1.id = t2.id OR t1.id = t2.id ORDER BY ALL; SELECT 'JOIN expression aliases'; -SELECT t1_id, t1.value, t2_id, t2.value FROM test_table_join_1 AS t1 {{ join_type }} JOIN test_table_join_2 AS t2 ON (t1.id AS t1_id) = (t2.id AS t2_id); +SELECT t1_id, t1.value, t2_id, t2.value FROM test_table_join_1 AS t1 {{ join_type }} JOIN test_table_join_2 AS t2 ON (t1.id AS t1_id) = (t2.id AS t2_id) ORDER BY ALL; SELECT '--'; -SELECT t1.id AS t1_id, t1.value, t2.id AS t2_id, t2.value FROM test_table_join_1 AS t1 {{ join_type }} JOIN test_table_join_2 AS t2 ON t1_id = t2_id; +SELECT t1.id AS t1_id, t1.value, t2.id AS t2_id, t2.value FROM test_table_join_1 AS t1 {{ join_type }} JOIN test_table_join_2 AS t2 ON t1_id = t2_id ORDER BY ALL; {% endfor %} @@ -110,56 +110,56 @@ SELECT 'JOIN ON without conditions'; SELECT test_table_join_1.id, test_table_join_1.value, test_table_join_2.id, test_table_join_2.value, test_table_join_3.id, test_table_join_3.value FROM test_table_join_1 {{ first_join_type }} JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id -{{ second_join_type }} JOIN test_table_join_3 ON test_table_join_2.id = test_table_join_3.id; +{{ second_join_type }} JOIN test_table_join_3 ON test_table_join_2.id = test_table_join_3.id ORDER BY ALL; SELECT '--'; SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value FROM test_table_join_1 AS t1 {{ first_join_type }} JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id -{{ second_join_type }} JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +{{ second_join_type }} JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id ORDER BY ALL; SELECT '--'; SELECT t1.id, test_table_join_1.id, t1.value, test_table_join_1.value, t2.id, test_table_join_2.id, t2.value, test_table_join_2.value, t3.id, test_table_join_3.id, t3.value, test_table_join_3.value FROM test_table_join_1 AS t1 {{ first_join_type }} JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id -{{ second_join_type }} JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +{{ second_join_type }} JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id ORDER BY ALL; SELECT '--'; SELECT t1.value, t2.value, t3.value FROM test_table_join_1 AS t1 {{ first_join_type }} JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id -{{ second_join_type }} JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +{{ second_join_type }} JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id ORDER BY ALL; SELECT 'JOIN ON with conditions'; SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value FROM test_table_join_1 AS t1 {{ first_join_type }} JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value == 'Join_1_Value_0' -{{ second_join_type }} JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0'; +{{ second_join_type }} JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0' ORDER BY ALL; SELECT '--'; SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value FROM test_table_join_1 AS t1 {{ first_join_type }} JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value == 'Join_1_Value_0' AND t2.value == 'Join_2_Value_0' -{{ second_join_type }} JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0' AND t3.value == 'Join_3_Value_0'; +{{ second_join_type }} JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0' AND t3.value == 'Join_3_Value_0' ORDER BY ALL; SELECT 'JOIN multiple clauses'; SELECT t1.value, t2.value, t3.value FROM test_table_join_1 AS t1 {{ first_join_type }} JOIN test_table_join_2 AS t2 ON t1.id = t2.id OR t1.id = t2.id -{{ second_join_type }} JOIN test_table_join_3 AS t3 ON t2.id = t3.id OR t3.id = t2.id; +{{ second_join_type }} JOIN test_table_join_3 AS t3 ON t2.id = t3.id OR t3.id = t2.id ORDER BY ALL; SELECT 'JOIN expression aliases'; SELECT t1_id, t1.value, t2_id, t2.value, t3_id, t3.value FROM test_table_join_1 AS t1 {{ first_join_type }} JOIN test_table_join_2 AS t2 ON (t1.id AS t1_id) = (t2.id AS t2_id) -{{ second_join_type }} JOIN test_table_join_3 AS t3 ON t2_id = (t3.id AS t3_id); +{{ second_join_type }} JOIN test_table_join_3 AS t3 ON t2_id = (t3.id AS t3_id) ORDER BY ALL; SELECT '--'; SELECT t1.id AS t1_id, t1.value, t2.id AS t2_id, t2.value, t3.id AS t3_id, t3.value FROM test_table_join_1 AS t1 {{ first_join_type }} JOIN test_table_join_2 AS t2 ON t1_id = t2_id -{{ second_join_type }} JOIN test_table_join_3 AS t3 ON t2_id = t3_id; +{{ second_join_type }} JOIN test_table_join_3 AS t3 ON t2_id = t3_id ORDER BY ALL; {% endfor %} {% endfor %} diff --git a/tests/queries/0_stateless/02373_analyzer_join_use_nulls.reference b/tests/queries/0_stateless/02373_analyzer_join_use_nulls.reference index 3722c23e4a0..2b3671e1ea6 100644 --- a/tests/queries/0_stateless/02373_analyzer_join_use_nulls.reference +++ b/tests/queries/0_stateless/02373_analyzer_join_use_nulls.reference @@ -1,27 +1,27 @@ -- { echoOn } SELECT t1.id AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), t2.id AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value) -FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id; +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id ORDER BY ALL; 0 UInt64 Join_1_Value_0 String 0 UInt64 Join_2_Value_0 String 1 UInt64 Join_1_Value_1 String 1 UInt64 Join_2_Value_1 String SELECT '--'; -- SELECT t1.id AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), t2.id AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value) -FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON t1.id = t2.id; +FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON t1.id = t2.id ORDER BY ALL; 0 UInt64 Join_1_Value_0 String 0 Nullable(UInt64) Join_2_Value_0 Nullable(String) 1 UInt64 Join_1_Value_1 String 1 Nullable(UInt64) Join_2_Value_1 Nullable(String) 2 UInt64 Join_1_Value_2 String \N Nullable(UInt64) \N Nullable(String) SELECT '--'; -- SELECT t1.id AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), t2.id AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value) -FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON t1.id = t2.id; +FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON t1.id = t2.id ORDER BY ALL; 0 Nullable(UInt64) Join_1_Value_0 Nullable(String) 0 UInt64 Join_2_Value_0 String 1 Nullable(UInt64) Join_1_Value_1 Nullable(String) 1 UInt64 Join_2_Value_1 String \N Nullable(UInt64) \N Nullable(String) 3 UInt64 Join_2_Value_3 String SELECT '--'; -- SELECT t1.id AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), t2.id AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value) -FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON t1.id = t2.id; +FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON t1.id = t2.id ORDER BY ALL; 0 Nullable(UInt64) Join_1_Value_0 Nullable(String) 0 Nullable(UInt64) Join_2_Value_0 Nullable(String) 1 Nullable(UInt64) Join_1_Value_1 Nullable(String) 1 Nullable(UInt64) Join_2_Value_1 Nullable(String) 2 Nullable(UInt64) Join_1_Value_2 Nullable(String) \N Nullable(UInt64) \N Nullable(String) @@ -30,14 +30,14 @@ SELECT '--'; -- SELECT id AS using_id, toTypeName(using_id), t1.id AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), t2.id AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value) -FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 USING (id); +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 USING (id) ORDER BY ALL; 0 UInt64 0 UInt64 Join_1_Value_0 String 0 UInt64 Join_2_Value_0 String 1 UInt64 1 UInt64 Join_1_Value_1 String 1 UInt64 Join_2_Value_1 String SELECT '--'; -- SELECT id AS using_id, toTypeName(using_id), t1.id AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), t2.id AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value) -FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 USING (id); +FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 USING (id) ORDER BY ALL; 0 UInt64 0 UInt64 Join_1_Value_0 String 0 Nullable(UInt64) Join_2_Value_0 Nullable(String) 1 UInt64 1 UInt64 Join_1_Value_1 String 1 Nullable(UInt64) Join_2_Value_1 Nullable(String) 2 UInt64 2 UInt64 Join_1_Value_2 String \N Nullable(UInt64) \N Nullable(String) @@ -45,7 +45,7 @@ SELECT '--'; -- SELECT id AS using_id, toTypeName(using_id), t1.id AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), t2.id AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value) -FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 USING (id); +FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 USING (id) ORDER BY ALL; 0 UInt64 0 Nullable(UInt64) Join_1_Value_0 Nullable(String) 0 UInt64 Join_2_Value_0 String 1 UInt64 1 Nullable(UInt64) Join_1_Value_1 Nullable(String) 1 UInt64 Join_2_Value_1 String 3 UInt64 \N Nullable(UInt64) \N Nullable(String) 3 UInt64 Join_2_Value_3 String @@ -53,7 +53,7 @@ SELECT '--'; -- SELECT id AS using_id, toTypeName(using_id), t1.id AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), t2.id AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value) -FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 USING (id); +FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 USING (id) ORDER BY ALL; 0 Nullable(UInt64) 0 Nullable(UInt64) Join_1_Value_0 Nullable(String) 0 Nullable(UInt64) Join_2_Value_0 Nullable(String) 1 Nullable(UInt64) 1 Nullable(UInt64) Join_1_Value_1 Nullable(String) 1 Nullable(UInt64) Join_2_Value_1 Nullable(String) 2 Nullable(UInt64) 2 Nullable(UInt64) Join_1_Value_2 Nullable(String) \N Nullable(UInt64) \N Nullable(String) diff --git a/tests/queries/0_stateless/02373_analyzer_join_use_nulls.sql b/tests/queries/0_stateless/02373_analyzer_join_use_nulls.sql index db7895084e8..bcec6d178a8 100644 --- a/tests/queries/0_stateless/02373_analyzer_join_use_nulls.sql +++ b/tests/queries/0_stateless/02373_analyzer_join_use_nulls.sql @@ -26,46 +26,46 @@ INSERT INTO test_table_join_2 VALUES (3, 'Join_2_Value_3'); -- { echoOn } SELECT t1.id AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), t2.id AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value) -FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id; +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id ORDER BY ALL; SELECT '--'; SELECT t1.id AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), t2.id AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value) -FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON t1.id = t2.id; +FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON t1.id = t2.id ORDER BY ALL; SELECT '--'; SELECT t1.id AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), t2.id AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value) -FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON t1.id = t2.id; +FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON t1.id = t2.id ORDER BY ALL; SELECT '--'; SELECT t1.id AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), t2.id AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value) -FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON t1.id = t2.id; +FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON t1.id = t2.id ORDER BY ALL; SELECT '--'; SELECT id AS using_id, toTypeName(using_id), t1.id AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), t2.id AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value) -FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 USING (id); +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 USING (id) ORDER BY ALL; SELECT '--'; SELECT id AS using_id, toTypeName(using_id), t1.id AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), t2.id AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value) -FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 USING (id); +FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 USING (id) ORDER BY ALL; SELECT '--'; SELECT id AS using_id, toTypeName(using_id), t1.id AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), t2.id AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value) -FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 USING (id); +FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 USING (id) ORDER BY ALL; SELECT '--'; SELECT id AS using_id, toTypeName(using_id), t1.id AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), t2.id AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value) -FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 USING (id); +FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 USING (id) ORDER BY ALL; -- { echoOff } diff --git a/tests/queries/0_stateless/02992_all_columns_should_have_comment.sql b/tests/queries/0_stateless/02992_all_columns_should_have_comment.sql index 127c6fee07d..dcb7c09a973 100644 --- a/tests/queries/0_stateless/02992_all_columns_should_have_comment.sql +++ b/tests/queries/0_stateless/02992_all_columns_should_have_comment.sql @@ -1,4 +1,8 @@ SYSTEM FLUSH LOGS; SELECT 'Column ' || name || ' from table ' || concat(database, '.', table) || ' should have a comment' FROM system.columns -WHERE (database = 'system') AND (comment = '') AND (table NOT ILIKE '%_log_%') AND (table NOT IN ('numbers', 'numbers_mt', 'one', 'generate_series', 'generateSeries', 'coverage_log')) AND (default_kind != 'ALIAS'); +WHERE (database = 'system') AND + (comment = '') AND + (table NOT ILIKE '%_log_%') AND + (table NOT IN ('numbers', 'numbers_mt', 'one', 'generate_series', 'generateSeries', 'coverage_log', 'filesystem_read_prefetches_log')) AND + (default_kind != 'ALIAS'); From a493e5c8e7e885cc8b66626ebf2911a6e3387b78 Mon Sep 17 00:00:00 2001 From: vdimir Date: Mon, 22 Jul 2024 11:05:37 +0000 Subject: [PATCH 120/661] Followup #66725 --- .../Passes/LogicalExpressionOptimizerPass.cpp | 11 +++++++++-- .../02911_join_on_nullsafe_optimization.reference | 2 ++ .../02911_join_on_nullsafe_optimization.sql | 2 ++ 3 files changed, 13 insertions(+), 2 deletions(-) diff --git a/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp b/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp index 5c68bca3a6e..e136440556f 100644 --- a/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp +++ b/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp @@ -68,10 +68,13 @@ QueryTreeNodePtr findEqualsFunction(const QueryTreeNodes & nodes) return nullptr; } -/// Checks if the node is combination of isNull and notEquals functions of two the same arguments +/// Checks if the node is combination of isNull and notEquals functions of two the same arguments: +/// [ (a <> b AND) ] (a IS NULL) AND (b IS NULL) bool matchIsNullOfTwoArgs(const QueryTreeNodes & nodes, QueryTreeNodePtr & lhs, QueryTreeNodePtr & rhs) { QueryTreeNodePtrWithHashSet all_arguments; + QueryTreeNodePtrWithHashSet is_null_arguments; + for (const auto & node : nodes) { const auto * func_node = node->as(); @@ -80,7 +83,11 @@ bool matchIsNullOfTwoArgs(const QueryTreeNodes & nodes, QueryTreeNodePtr & lhs, const auto & arguments = func_node->getArguments().getNodes(); if (func_node->getFunctionName() == "isNull" && arguments.size() == 1) + { all_arguments.insert(QueryTreeNodePtrWithHash(arguments[0])); + is_null_arguments.insert(QueryTreeNodePtrWithHash(arguments[0])); + } + else if (func_node->getFunctionName() == "notEquals" && arguments.size() == 2) { if (arguments[0]->isEqual(*arguments[1])) @@ -95,7 +102,7 @@ bool matchIsNullOfTwoArgs(const QueryTreeNodes & nodes, QueryTreeNodePtr & lhs, return false; } - if (all_arguments.size() != 2) + if (all_arguments.size() != 2 || is_null_arguments.size() != 2) return false; lhs = all_arguments.begin()->node; diff --git a/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.reference b/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.reference index 31a1cda18e7..8f194b4ffde 100644 --- a/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.reference +++ b/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.reference @@ -39,6 +39,8 @@ SELECT * FROM t1 JOIN t2 ON (t1.x == t2.x AND ((t2.x IS NOT NULL) AND (t1.x IS N 2 2 2 2 3 3 3 33 \N \N \N \N +SELECT * FROM t1 JOIN t2 ON (t1.x == t2.x AND ((t2.x IS NOT NULL) AND (t1.x IS NOT NULL)) ) OR ( t2.x <> t1.x AND t2.x <> t1.x ) ORDER BY t1.x NULLS LAST; -- { serverError INVALID_JOIN_ON_EXPRESSION } +SELECT * FROM t1 JOIN t2 ON (t1.x == t2.x AND ((t2.x IS NOT NULL) AND (t1.x IS NOT NULL)) ) OR ( t2.x <> t1.x AND (t2.x IS NULL) AND (t2.x IS NULL) ) ORDER BY t1.x NULLS LAST; -- { serverError INVALID_JOIN_ON_EXPRESSION } -- aliases defined in the join condition are valid -- FIXME(@vdimir) broken query formatting for the following queries: -- SELECT *, e, e2 FROM t1 FULL JOIN t2 ON ( ( ((t1.x == t2.x) AS e) AND ((t2.x IS NOT NULL) AND (t1.x IS NOT NULL)) ) OR ( (t2.x IS NULL) AND (t1.x IS NULL) ) AS e2 ) ORDER BY t1.x NULLS LAST, t2.x NULLS LAST; diff --git a/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.sql b/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.sql index f739259caf9..18cb303a54a 100644 --- a/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.sql +++ b/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.sql @@ -35,6 +35,8 @@ SELECT x = y OR (x IS NULL AND y IS NULL) FROM t1 ORDER BY x NULLS LAST; SELECT * FROM t1 JOIN t2 ON (t1.x == t2.x AND ((t2.x IS NOT NULL) AND (t1.x IS NOT NULL)) ) OR ( (t2.x IS NULL) AND (t1.x IS NULL) ) ORDER BY t1.x NULLS LAST; SELECT * FROM t1 JOIN t2 ON (t1.x == t2.x AND ((t2.x IS NOT NULL) AND (t1.x IS NOT NULL)) ) OR ( t2.x <> t1.x AND (t2.x IS NULL) AND (t1.x IS NULL) ) ORDER BY t1.x NULLS LAST; +SELECT * FROM t1 JOIN t2 ON (t1.x == t2.x AND ((t2.x IS NOT NULL) AND (t1.x IS NOT NULL)) ) OR ( t2.x <> t1.x AND t2.x <> t1.x ) ORDER BY t1.x NULLS LAST; -- { serverError INVALID_JOIN_ON_EXPRESSION } +SELECT * FROM t1 JOIN t2 ON (t1.x == t2.x AND ((t2.x IS NOT NULL) AND (t1.x IS NOT NULL)) ) OR ( t2.x <> t1.x AND (t2.x IS NULL) AND (t2.x IS NULL) ) ORDER BY t1.x NULLS LAST; -- { serverError INVALID_JOIN_ON_EXPRESSION } -- aliases defined in the join condition are valid -- FIXME(@vdimir) broken query formatting for the following queries: From 2dc264928f311e2f4d10001044d070b6a6a05471 Mon Sep 17 00:00:00 2001 From: Yatsishin Ilya <2159081+qoega@users.noreply.github.com> Date: Mon, 22 Jul 2024 11:33:51 +0000 Subject: [PATCH 121/661] Added tests, rewritten logic which engines and table functions to allow, added replace for create table ... AS table_function() syntax. --- docs/en/operations/settings/settings.md | 12 ++ src/Core/Settings.h | 3 +- src/Core/SettingsChangesHistory.cpp | 3 +- src/Interpreters/InterpreterCreateQuery.cpp | 50 +++--- .../test_restore_external_engines/__init__.py | 0 .../configs/backups_disk.xml | 14 ++ .../configs/remote_servers.xml | 21 +++ .../test_restore_external_engines/test.py | 143 ++++++++++++++++++ 8 files changed, 217 insertions(+), 29 deletions(-) create mode 100644 tests/integration/test_restore_external_engines/__init__.py create mode 100644 tests/integration/test_restore_external_engines/configs/backups_disk.xml create mode 100644 tests/integration/test_restore_external_engines/configs/remote_servers.xml create mode 100644 tests/integration/test_restore_external_engines/test.py diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index c3f697c3bdc..65b8df7a9e2 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -5608,3 +5608,15 @@ Default value: `10000000`. Minimal size of block to compress in CROSS JOIN. Zero value means - disable this threshold. This block is compressed when any of the two thresholds (by rows or by bytes) are reached. Default value: `1GiB`. + +## restore_replace_external_engines_to_null + +For testing purposes. Replaces all external engines to Null to not initiate external connections. + +Default value: `False` + +## restore_replace_external_table_functions_to_null + +For testing purposes. Replaces all external engines to Null to not initiate external connections. + +Default value: `False` \ No newline at end of file diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 7bf97896357..e6d2cac359b 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -891,7 +891,8 @@ class IColumn; M(Bool, optimize_distinct_in_order, true, "Enable DISTINCT optimization if some columns in DISTINCT form a prefix of sorting. For example, prefix of sorting key in merge tree or ORDER BY statement", 0) \ M(Bool, keeper_map_strict_mode, false, "Enforce additional checks during operations on KeeperMap. E.g. throw an exception on an insert for already existing key", 0) \ M(UInt64, extract_key_value_pairs_max_pairs_per_row, 1000, "Max number of pairs that can be produced by the `extractKeyValuePairs` function. Used as a safeguard against consuming too much memory.", 0) ALIAS(extract_kvp_max_pairs_per_row) \ - M(Bool, restore_replace_external_engine_to_null, false, "Replace all the External table engines to Null on restore. Useful for testing purposes", 0) \ + M(Bool, restore_replace_external_engines_to_null, false, "Replace all the External table engines to Null on restore. Useful for testing purposes", 0) \ + M(Bool, restore_replace_external_table_functions_to_null, false, "Replace all table functions to Null on restore. Useful for testing purposes", 0) \ \ \ /* ###################################### */ \ diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp index a23d9d17da2..0abcfb0cfb9 100644 --- a/src/Core/SettingsChangesHistory.cpp +++ b/src/Core/SettingsChangesHistory.cpp @@ -79,7 +79,8 @@ static std::initializer_listno_empty_args = true; storage.set(storage.engine, engine_ast); } + } void InterpreterCreateQuery::setEngine(ASTCreateQuery & create) const { + if (create.as_table_function) + { + if (getContext()->getSettingsRef().restore_replace_external_table_functions_to_null) + { + const auto & factory = TableFunctionFactory::instance(); + + auto properties = factory.tryGetProperties(create.as_table_function->as()->name); + if (properties && properties->allow_readonly) + return; + if (!create.storage) + { + auto storage_ast = std::make_shared(); + create.set(create.storage, storage_ast); + } + else + throw Exception(ErrorCodes::LOGICAL_ERROR, "Storage should not be created yet, it's a bug."); + create.as_table_function = nullptr; + setNullTableEngine(*create.storage); + } return; + } if (create.is_dictionary || create.is_ordinary_view || create.is_live_view || create.is_window_view) return; @@ -1010,34 +1031,9 @@ void InterpreterCreateQuery::setEngine(ASTCreateQuery & create) const setDefaultTableEngine(*create.storage, getContext()->getSettingsRef().default_table_engine.value); /// For exrternal tables with restore_replace_external_engine_to_null setting we replace external engines to /// Null table engine. - else if (create.storage->engine->name == "AzureBlobStorage" || - create.storage->engine->name == "AzureQueue" || - create.storage->engine->name == "COSN" || - create.storage->engine->name == "DeltaLake" || - create.storage->engine->name == "Dictionary" || - create.storage->engine->name == "Executable" || - create.storage->engine->name == "ExecutablePool" || - create.storage->engine->name == "ExternalDistributed" || - create.storage->engine->name == "File" || - create.storage->engine->name == "Hudi" || - create.storage->engine->name == "Iceberg" || - create.storage->engine->name == "JDBC" || - create.storage->engine->name == "Kafka" || - create.storage->engine->name == "MaterializedPostgreSQL" || - create.storage->engine->name == "MongoDB" || - create.storage->engine->name == "MySQL" || - create.storage->engine->name == "NATS" || - create.storage->engine->name == "ODBC" || - create.storage->engine->name == "OSS" || - create.storage->engine->name == "PostgreSQL" || - create.storage->engine->name == "RabbitMQ" || - create.storage->engine->name == "Redis" || - create.storage->engine->name == "S3" || - create.storage->engine->name == "S3Queue" || - create.storage->engine->name == "TinyLog" || - create.storage->engine->name == "URL") + else if (getContext()->getSettingsRef().restore_replace_external_engines_to_null) { - if (getContext()->getSettingsRef().restore_replace_external_engine_to_null) + if (StorageFactory::instance().getStorageFeatures(create.storage->engine->name).source_access_type != AccessType::NONE) setNullTableEngine(*create.storage); } return; diff --git a/tests/integration/test_restore_external_engines/__init__.py b/tests/integration/test_restore_external_engines/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_restore_external_engines/configs/backups_disk.xml b/tests/integration/test_restore_external_engines/configs/backups_disk.xml new file mode 100644 index 00000000000..f7d666c6542 --- /dev/null +++ b/tests/integration/test_restore_external_engines/configs/backups_disk.xml @@ -0,0 +1,14 @@ + + + + + local + /backups/ + + + + + backups + /backups/ + + diff --git a/tests/integration/test_restore_external_engines/configs/remote_servers.xml b/tests/integration/test_restore_external_engines/configs/remote_servers.xml new file mode 100644 index 00000000000..76ad3618339 --- /dev/null +++ b/tests/integration/test_restore_external_engines/configs/remote_servers.xml @@ -0,0 +1,21 @@ + + + + + true + + replica1 + 9000 + + + replica2 + 9000 + + + replica3 + 9000 + + + + + diff --git a/tests/integration/test_restore_external_engines/test.py b/tests/integration/test_restore_external_engines/test.py new file mode 100644 index 00000000000..cde4b0deb00 --- /dev/null +++ b/tests/integration/test_restore_external_engines/test.py @@ -0,0 +1,143 @@ +import pytest + +import pymysql.cursors +import pytest +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__) +configs = ["configs/remote_servers.xml", "configs/backups_disk.xml"] + +node1 = cluster.add_instance("replica1", with_zookeeper=True, with_mysql8=True, main_configs=configs, external_dirs=["/backups/"]) +node2 = cluster.add_instance("replica2", with_zookeeper=True, with_mysql8=True, main_configs=configs, external_dirs=["/backups/"]) +node3 = cluster.add_instance("replica3", with_zookeeper=True, with_mysql8=True, main_configs=configs, external_dirs=["/backups/"]) +nodes = [node1, node2, node3] + +backup_id_counter = 0 + +def new_backup_name(): + global backup_id_counter + backup_id_counter += 1 + return f"Disk('backups', '{backup_id_counter}/')" + +def cleanup_nodes(nodes, dbname): + for node in nodes: + node.query(f"DROP DATABASE IF EXISTS {dbname} SYNC") + +def fill_nodes(nodes, dbname): + cleanup_nodes(nodes, dbname) + for node in nodes: + node.query(f"CREATE DATABASE {dbname} ENGINE = Replicated('/clickhouse/databases/{dbname}', 'default', '{node.name}')") + +def drop_mysql_table(conn, tableName): + with conn.cursor() as cursor: + cursor.execute(f"DROP TABLE IF EXISTS `clickhouse`.`{tableName}`") + +def get_mysql_conn(cluster): + conn = pymysql.connect( + user="root", password="clickhouse", host=cluster.mysql8_ip, port=cluster.mysql8_port + ) + return conn + +def fill_tables(cluster, dbname): + fill_nodes(nodes, dbname) + + conn = get_mysql_conn(cluster) + + with conn.cursor() as cursor: + cursor.execute( + "DROP DATABASE IF EXISTS clickhouse" + ) + cursor.execute( + "CREATE DATABASE clickhouse" + ) + cursor.execute( + "DROP TABLE IF EXISTS clickhouse.inference_table" + ) + cursor.execute( + "CREATE TABLE clickhouse.inference_table (id INT PRIMARY KEY, data BINARY(16) NOT NULL)" + ) + cursor.execute( + "INSERT INTO clickhouse.inference_table VALUES (100, X'9fad5e9eefdfb449')" + ) + conn.commit() + + parameters = "'mysql80:3306', 'clickhouse', 'inference_table', 'root', 'clickhouse'" + + node1.query( + f"CREATE TABLE {dbname}.mysql_schema_inference_engine ENGINE=MySQL({parameters})" + ) + node1.query(f"CREATE TABLE {dbname}.mysql_schema_inference_function AS mysql({parameters})") + + node1.query(f"CREATE TABLE {dbname}.merge_tree (id UInt64, b String) ORDER BY id") + node1.query(f"INSERT INTO {dbname}.merge_tree VALUES (100, 'abc')") + + expected = "id\tInt32\t\t\t\t\t\ndata\tFixedString(16)\t\t\t\t\t\n" + assert node1.query(f"DESCRIBE TABLE {dbname}.mysql_schema_inference_engine") == expected + assert node1.query(f"DESCRIBE TABLE {dbname}.mysql_schema_inference_function") == expected + assert node1.query(f"SELECT id FROM mysql({parameters})") == "100\n" + assert node1.query(f"SELECT id FROM {dbname}.mysql_schema_inference_engine") == "100\n" + assert node1.query(f"SELECT id FROM {dbname}.mysql_schema_inference_function") == "100\n" + assert node1.query(f"SELECT id FROM {dbname}.merge_tree") == "100\n" + + +@pytest.fixture(scope="module") +def start_cluster(): + try: + cluster.start() + yield cluster + + except Exception as ex: + print(ex) + + finally: + cluster.shutdown() + +def test_restore_table(start_cluster): + fill_tables(cluster, "replicated") + backup_name = new_backup_name() + node2.query(f"SYSTEM SYNC DATABASE REPLICA replicated;") + + node2.query(f"BACKUP DATABASE replicated TO {backup_name}") + + node2.query("DROP TABLE replicated.mysql_schema_inference_engine") + node2.query("DROP TABLE replicated.mysql_schema_inference_function") + + node3.query(f"SYSTEM SYNC DATABASE REPLICA replicated;") + + assert node3.query("EXISTS replicated.mysql_schema_inference_engine") == "0\n" + assert node3.query("EXISTS replicated.mysql_schema_inference_function") == "0\n" + + node3.query(f"RESTORE DATABASE replicated FROM {backup_name} SETTINGS allow_different_database_def=true") + node1.query(f"SYSTEM SYNC DATABASE REPLICA replicated;") + + assert node1.query("SELECT count(), sum(id) FROM replicated.mysql_schema_inference_engine") == "1\t100\n" + assert node1.query("SELECT count(), sum(id) FROM replicated.mysql_schema_inference_function") == "1\t100\n" + assert node1.query("SELECT count(), sum(id) FROM replicated.merge_tree") == "1\t100\n" + cleanup_nodes(nodes, "replicated") + + +def test_restore_table_null(start_cluster): + fill_tables(cluster, "replicated2") + + backup_name = new_backup_name() + node2.query(f"SYSTEM SYNC DATABASE REPLICA replicated2;") + + node2.query(f"BACKUP DATABASE replicated2 TO {backup_name}") + + node2.query("DROP TABLE replicated2.mysql_schema_inference_engine") + node2.query("DROP TABLE replicated2.mysql_schema_inference_function") + + node3.query(f"SYSTEM SYNC DATABASE REPLICA replicated2;") + + assert node3.query("EXISTS replicated2.mysql_schema_inference_engine") == "0\n" + assert node3.query("EXISTS replicated2.mysql_schema_inference_function") == "0\n" + + node3.query(f"RESTORE DATABASE replicated2 FROM {backup_name} SETTINGS allow_different_database_def=1, allow_different_table_def=1 SETTINGS restore_replace_external_engine_to_null=1, restore_replace_external_table_functions_to_null=1") + node1.query(f"SYSTEM SYNC DATABASE REPLICA replicated2;") + + assert node1.query("SELECT count(), sum(id) FROM replicated2.mysql_schema_inference_engine") == "0\t0\n" + assert node1.query("SELECT count(), sum(id) FROM replicated2.mysql_schema_inference_function") == "0\t0\n" + assert node1.query("SELECT count(), sum(id) FROM replicated2.merge_tree") == "1\t100\n" + assert node1.query("SELECT engine FROM system.tables where database = 'replicated2' and name like '%mysql%'") == "Null\nNull\n" + assert node1.query("SELECT engine FROM system.tables where database = 'replicated2' and name like '%merge_tree%'") == "MergeTree\n" + cleanup_nodes(nodes, "replicated2") From af4c2fa8a405c53d7de6d9ed41d63988caf22b04 Mon Sep 17 00:00:00 2001 From: Yatsishin Ilya <2159081+qoega@users.noreply.github.com> Date: Mon, 22 Jul 2024 12:27:24 +0000 Subject: [PATCH 122/661] fix --- tests/integration/test_restore_external_engines/test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_restore_external_engines/test.py b/tests/integration/test_restore_external_engines/test.py index cde4b0deb00..be2cae334e2 100644 --- a/tests/integration/test_restore_external_engines/test.py +++ b/tests/integration/test_restore_external_engines/test.py @@ -132,7 +132,7 @@ def test_restore_table_null(start_cluster): assert node3.query("EXISTS replicated2.mysql_schema_inference_engine") == "0\n" assert node3.query("EXISTS replicated2.mysql_schema_inference_function") == "0\n" - node3.query(f"RESTORE DATABASE replicated2 FROM {backup_name} SETTINGS allow_different_database_def=1, allow_different_table_def=1 SETTINGS restore_replace_external_engine_to_null=1, restore_replace_external_table_functions_to_null=1") + node3.query(f"RESTORE DATABASE replicated2 FROM {backup_name} SETTINGS allow_different_database_def=1, allow_different_table_def=1 SETTINGS restore_replace_external_engines_to_null=1, restore_replace_external_table_functions_to_null=1") node1.query(f"SYSTEM SYNC DATABASE REPLICA replicated2;") assert node1.query("SELECT count(), sum(id) FROM replicated2.mysql_schema_inference_engine") == "0\t0\n" From ae5eccbf20b7198d6a3cc908e0186a384aba038a Mon Sep 17 00:00:00 2001 From: jsc0218 Date: Mon, 22 Jul 2024 13:39:48 +0000 Subject: [PATCH 123/661] just a commit to trigger CI --- .../test_grant_and_revoke/test_without_table_engine_grant.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/integration/test_grant_and_revoke/test_without_table_engine_grant.py b/tests/integration/test_grant_and_revoke/test_without_table_engine_grant.py index 210bb8ec465..4a5dfb83f79 100644 --- a/tests/integration/test_grant_and_revoke/test_without_table_engine_grant.py +++ b/tests/integration/test_grant_and_revoke/test_without_table_engine_grant.py @@ -60,6 +60,7 @@ def test_table_engine_and_source_grant(): ) # expecting grant POSTGRES instead of grant PostgreSQL due to discrepancy between source access type and table engine + # similarily, other sources should also use their own defined name instead of the name of table engine assert "grant POSTGRES ON *.*" in instance.query_and_get_error( """ CREATE TABLE test.table1(a Integer) From a3dbf87df6a7b3105fdbc79cafe0b2ec2fa547d5 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Mon, 22 Jul 2024 16:19:16 +0200 Subject: [PATCH 124/661] Update convertFieldToType.cpp --- src/Interpreters/convertFieldToType.cpp | 38 ++++++++++++------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/src/Interpreters/convertFieldToType.cpp b/src/Interpreters/convertFieldToType.cpp index 9ee214f4415..b92cbae7b09 100644 --- a/src/Interpreters/convertFieldToType.cpp +++ b/src/Interpreters/convertFieldToType.cpp @@ -384,25 +384,25 @@ Field convertFieldToTypeImpl(const Field & src, const IDataType & type, const ID { const auto & element_type = *(type_tuple->getElements()[i]); res[i] = convertFieldToType(src_tuple[i], element_type); - if (!res[i].isNull() || canContainNull(element_type)) - continue; - - /* - * Either the source element was Null, or the conversion did not - * succeed, because the source and the requested types of the - * element are compatible, but the value is not convertible - * (e.g. trying to convert -1 from Int8 to UInt8). In these - * cases, consider the whole tuple also compatible but not - * convertible. According to the specification of this function, - * we must return Null in this case. - * - * The following elements might be not even compatible, so it - * makes sense to check them to detect user errors. Remember - * that there is an unconvertible element, and try to process - * the remaining ones. The convertFieldToType for each element - * will throw if it detects incompatibility. - */ - have_unconvertible_element = true; + if (res[i].isNull() && !canContainNull(element_type)) + { + /* + * Either the source element was Null, or the conversion did not + * succeed, because the source and the requested types of the + * element are compatible, but the value is not convertible + * (e.g. trying to convert -1 from Int8 to UInt8). In these + * cases, consider the whole tuple also compatible but not + * convertible. According to the specification of this function, + * we must return Null in this case. + * + * The following elements might be not even compatible, so it + * makes sense to check them to detect user errors. Remember + * that there is an unconvertible element, and try to process + * the remaining ones. The convertFieldToType for each element + * will throw if it detects incompatibility. + */ + have_unconvertible_element = true; + } } return have_unconvertible_element ? Field(Null()) : Field(res); From 660530c611000f5eb8875c640d5aed196315a187 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 22 Jul 2024 17:10:39 +0200 Subject: [PATCH 125/661] Fix tidy --- src/Disks/IO/ReadBufferFromAzureBlobStorage.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/Disks/IO/ReadBufferFromAzureBlobStorage.cpp b/src/Disks/IO/ReadBufferFromAzureBlobStorage.cpp index a36a8b031b4..377f6b36888 100644 --- a/src/Disks/IO/ReadBufferFromAzureBlobStorage.cpp +++ b/src/Disks/IO/ReadBufferFromAzureBlobStorage.cpp @@ -258,10 +258,9 @@ std::optional ReadBufferFromAzureBlobStorage::tryGetFileSize() if (!blob_client) blob_client = std::make_unique(blob_container_client->GetBlobClient(path)); - if (file_size.has_value()) - return *file_size; + if (!file_size) + file_size = blob_client->GetProperties().Value.BlobSize; - file_size = blob_client->GetProperties().Value.BlobSize; return *file_size; } From 5fa2db8e4828b004ac10d625df62efcc8711dc98 Mon Sep 17 00:00:00 2001 From: Michael Stetsyuk Date: Mon, 22 Jul 2024 16:59:17 +0100 Subject: [PATCH 126/661] fix 01293_show_clusters stateless test --- tests/queries/0_stateless/01293_show_clusters.reference | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/01293_show_clusters.reference b/tests/queries/0_stateless/01293_show_clusters.reference index e140f207022..9569fcf2e37 100644 --- a/tests/queries/0_stateless/01293_show_clusters.reference +++ b/tests/queries/0_stateless/01293_show_clusters.reference @@ -1,3 +1,3 @@ test_shard_localhost -test_cluster_one_shard_two_replicas 1 1 0 1 127.0.0.1 127.0.0.1 9000 1 default -test_cluster_one_shard_two_replicas 1 1 0 2 127.0.0.2 127.0.0.2 9000 0 default +test_cluster_one_shard_two_replicas 1 1 0 1 127.0.0.1 127.0.0.1 9000 1 default 0 NULL +test_cluster_one_shard_two_replicas 1 1 0 2 127.0.0.2 127.0.0.2 9000 0 default 0 NULL From 2a893ed8cd10e6e3b7506f43b644b5037f96c49a Mon Sep 17 00:00:00 2001 From: Michael Stetsyuk Date: Mon, 22 Jul 2024 17:03:41 +0100 Subject: [PATCH 127/661] fxs --- src/IO/S3/Client.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/IO/S3/Client.cpp b/src/IO/S3/Client.cpp index db20420db9f..325c820f8bd 100644 --- a/src/IO/S3/Client.cpp +++ b/src/IO/S3/Client.cpp @@ -388,7 +388,8 @@ Model::HeadObjectOutcome Client::HeadObject(HeadObjectRequest & request) const if (isClientForDisk()) CurrentMetrics::add(CurrentMetrics::S3DiskNoKeyErrors); - return enrichErrorMessage(std::move(result)); + return enrichErrorMessage( + HeadObject(static_cast(request))); } /// For each request, we wrap the request functions from Aws::S3::Client with doRequest From 4b0b5b7f2dd66ecfd9cb0b533166a229e501cd52 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Mon, 22 Jul 2024 18:53:26 +0200 Subject: [PATCH 128/661] groupArrayIntersect: Fix internal name --- .../AggregateFunctionGroupArrayIntersect.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/AggregateFunctions/AggregateFunctionGroupArrayIntersect.cpp b/src/AggregateFunctions/AggregateFunctionGroupArrayIntersect.cpp index 20acda213da..591422adc57 100644 --- a/src/AggregateFunctions/AggregateFunctionGroupArrayIntersect.cpp +++ b/src/AggregateFunctions/AggregateFunctionGroupArrayIntersect.cpp @@ -69,7 +69,7 @@ public: : IAggregateFunctionDataHelper, AggregateFunctionGroupArrayIntersect>({argument_type}, parameters_, result_type_) {} - String getName() const override { return "GroupArrayIntersect"; } + String getName() const override { return "groupArrayIntersect"; } bool allocatesMemoryInArena() const override { return false; } @@ -213,7 +213,7 @@ public: : IAggregateFunctionDataHelper>({input_data_type_}, parameters_, result_type_) , input_data_type(result_type_) {} - String getName() const override { return "GroupArrayIntersect"; } + String getName() const override { return "groupArrayIntersect"; } bool allocatesMemoryInArena() const override { return true; } From 468bd551c64057fb056ed5434e68c26ded062a84 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Mon, 22 Jul 2024 19:02:20 +0200 Subject: [PATCH 129/661] Fix includes --- .../AggregateFunctionGroupArrayIntersect.cpp | 24 ++++++++----------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/src/AggregateFunctions/AggregateFunctionGroupArrayIntersect.cpp b/src/AggregateFunctions/AggregateFunctionGroupArrayIntersect.cpp index 591422adc57..1529cd5899a 100644 --- a/src/AggregateFunctions/AggregateFunctionGroupArrayIntersect.cpp +++ b/src/AggregateFunctions/AggregateFunctionGroupArrayIntersect.cpp @@ -1,12 +1,12 @@ -#include -#include - #include #include #include #include -#include +#include +#include +#include +#include #include #include @@ -15,18 +15,14 @@ #include #include -#include -#include - #include #include -#include #include -#include -#include -#include -#include +#include +#include + +#include namespace DB @@ -240,7 +236,7 @@ public: { const char * begin = nullptr; StringRef serialized = data_column->serializeValueIntoArena(offset + i, *arena, begin); - assert(serialized.data != nullptr); + chassert(serialized.data != nullptr); set.emplace(SerializedKeyHolder{serialized, *arena}, it, inserted); } } @@ -260,7 +256,7 @@ public: { const char * begin = nullptr; StringRef serialized = data_column->serializeValueIntoArena(offset + i, *arena, begin); - assert(serialized.data != nullptr); + chassert(serialized.data != nullptr); it = set.find(serialized); if (it != nullptr) From 078bddd65e0978c3feb2f8d77008f3c23bbc136f Mon Sep 17 00:00:00 2001 From: joelynch Date: Mon, 22 Jul 2024 18:55:23 +0200 Subject: [PATCH 130/661] Let window view take comments --- src/Parsers/ParserCreateQuery.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/Parsers/ParserCreateQuery.cpp b/src/Parsers/ParserCreateQuery.cpp index 014dc7bd3bf..3e908ea3602 100644 --- a/src/Parsers/ParserCreateQuery.cpp +++ b/src/Parsers/ParserCreateQuery.cpp @@ -1176,6 +1176,7 @@ bool ParserCreateWindowViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & if (!select_p.parse(pos, select, expected)) return false; + auto comment = parseComment(pos, expected); auto query = std::make_shared(); node = query; @@ -1194,6 +1195,8 @@ bool ParserCreateWindowViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & query->children.push_back(query->database); if (query->table) query->children.push_back(query->table); + if (comment) + query->set(query->comment, comment); if (to_table) query->to_table_id = to_table->as()->getTableId(); From 240f04561e1ebca2fb1823a6ee34d4f2c15d1e66 Mon Sep 17 00:00:00 2001 From: joelynch Date: Mon, 22 Jul 2024 19:07:46 +0200 Subject: [PATCH 131/661] Fix docs for COMMENT clause --- docs/en/sql-reference/statements/create/table.md | 7 +------ docs/en/sql-reference/statements/create/view.md | 8 +++++++- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/docs/en/sql-reference/statements/create/table.md b/docs/en/sql-reference/statements/create/table.md index b866d0b9f5f..9c8984d698f 100644 --- a/docs/en/sql-reference/statements/create/table.md +++ b/docs/en/sql-reference/statements/create/table.md @@ -21,7 +21,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] name2 [type2] [NULL|NOT NULL] [DEFAULT|MATERIALIZED|EPHEMERAL|ALIAS expr2] [COMMENT 'comment for column'] [compression_codec] [TTL expr2], ... ) ENGINE = engine - COMMENT 'comment for table' + [COMMENT 'comment for table'] ``` Creates a table named `table_name` in the `db` database or the current database if `db` is not set, with the structure specified in brackets and the `engine` engine. @@ -626,11 +626,6 @@ SELECT * FROM base.t1; You can add a comment to the table when you creating it. -:::note -The comment clause is supported by all table engines except [Kafka](../../../engines/table-engines/integrations/kafka.md), [RabbitMQ](../../../engines/table-engines/integrations/rabbitmq.md) and [EmbeddedRocksDB](../../../engines/table-engines/integrations/embedded-rocksdb.md). -::: - - **Syntax** ``` sql diff --git a/docs/en/sql-reference/statements/create/view.md b/docs/en/sql-reference/statements/create/view.md index 1fabb6d8cc7..2931f7020fb 100644 --- a/docs/en/sql-reference/statements/create/view.md +++ b/docs/en/sql-reference/statements/create/view.md @@ -16,6 +16,7 @@ Syntax: CREATE [OR REPLACE] VIEW [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster_name] [DEFINER = { user | CURRENT_USER }] [SQL SECURITY { DEFINER | INVOKER | NONE }] AS SELECT ... +[COMMENT 'comment'] ``` Normal views do not store any data. They just perform a read from another table on each access. In other words, a normal view is nothing more than a saved query. When reading from a view, this saved query is used as a subquery in the [FROM](../../../sql-reference/statements/select/from.md) clause. @@ -57,6 +58,7 @@ SELECT * FROM view(column1=value1, column2=value2 ...) CREATE MATERIALIZED VIEW [IF NOT EXISTS] [db.]table_name [ON CLUSTER] [TO[db.]name] [ENGINE = engine] [POPULATE] [DEFINER = { user | CURRENT_USER }] [SQL SECURITY { DEFINER | INVOKER | NONE }] AS SELECT ... +[COMMENT 'comment'] ``` :::tip @@ -161,6 +163,7 @@ RANDOMIZE FOR interval DEPENDS ON [db.]name [, [db.]name [, ...]] [TO[db.]name] [(columns)] [ENGINE = engine] [EMPTY] AS SELECT ... +[COMMENT 'comment'] ``` where `interval` is a sequence of simple intervals: ```sql @@ -267,7 +270,10 @@ This is an experimental feature that may change in backwards-incompatible ways i ::: ``` sql -CREATE WINDOW VIEW [IF NOT EXISTS] [db.]table_name [TO [db.]table_name] [INNER ENGINE engine] [ENGINE engine] [WATERMARK strategy] [ALLOWED_LATENESS interval_function] [POPULATE] AS SELECT ... GROUP BY time_window_function +CREATE WINDOW VIEW [IF NOT EXISTS] [db.]table_name [TO [db.]table_name] [INNER ENGINE engine] [ENGINE engine] [WATERMARK strategy] [ALLOWED_LATENESS interval_function] [POPULATE] +AS SELECT ... +GROUP BY time_window_function +[COMMENT 'comment'] ``` Window view can aggregate data by time window and output the results when the window is ready to fire. It stores the partial aggregation results in an inner(or specified) table to reduce latency and can push the processing result to a specified table or push notifications using the WATCH query. From 4a2708658d6d9b3ab0de9eab41d4e97bf04c3523 Mon Sep 17 00:00:00 2001 From: joelynch Date: Mon, 22 Jul 2024 19:08:04 +0200 Subject: [PATCH 132/661] Add more tables to table comment test --- .../0_stateless/01821_table_comment.reference | 8 ++- .../0_stateless/01821_table_comment.sql | 53 +++++++++++++++++-- 2 files changed, 54 insertions(+), 7 deletions(-) diff --git a/tests/queries/0_stateless/01821_table_comment.reference b/tests/queries/0_stateless/01821_table_comment.reference index 05acabae3d4..cdd87df43d2 100644 --- a/tests/queries/0_stateless/01821_table_comment.reference +++ b/tests/queries/0_stateless/01821_table_comment.reference @@ -1,4 +1,8 @@ -t1 this is a temtorary table +t1 this is a temporary table t2 this is a MergeTree table t3 this is a Log table -CREATE TABLE default.t1\n(\n `n` Int8\n)\nENGINE = Memory\nCOMMENT \'this is a temtorary table\' +t4 this is a Kafka table +t5 this is a EmbeddedRocksDB table +t6 this is a Executable table +t7 this is a WindowView table +CREATE TABLE default.t1\n(\n `n` Int8\n)\nENGINE = Memory\nCOMMENT \'this is a temporary table\' diff --git a/tests/queries/0_stateless/01821_table_comment.sql b/tests/queries/0_stateless/01821_table_comment.sql index 4bd71d3e278..32b89af0750 100644 --- a/tests/queries/0_stateless/01821_table_comment.sql +++ b/tests/queries/0_stateless/01821_table_comment.sql @@ -9,7 +9,7 @@ CREATE TABLE t1 `n` Int8 ) ENGINE = Memory -COMMENT 'this is a temtorary table'; +COMMENT 'this is a temporary table'; CREATE TABLE t2 ( @@ -26,14 +26,57 @@ CREATE TABLE t3 ENGINE = Log COMMENT 'this is a Log table'; +CREATE TABLE t4 +( + `n` Int8 +) +ENGINE = Kafka +SETTINGS + kafka_broker_list = 'localhost:10000', + kafka_topic_list = 'test', + kafka_group_name = 'test', + kafka_format = 'JSONEachRow' +COMMENT 'this is a Kafka table'; + +CREATE TABLE t5 +( + `n` Int8 +) +ENGINE = EmbeddedRocksDB +PRIMARY KEY n +COMMENT 'this is a EmbeddedRocksDB table'; + +CREATE TABLE t6 +( + `n` Int8 +) +ENGINE = Executable('script.py', TabSeparated) +COMMENT 'this is a Executable table'; + +SET allow_experimental_window_view = 1; +-- New analyzer doesn't support WindowView tables +SET allow_experimental_analyzer = 0; + +CREATE WINDOW VIEW t7 +( + `n` Int8 +) +ENGINE MergeTree +ORDER BY n +AS SELECT 1 +GROUP BY tumble(now(), toIntervalDay('1')) +COMMENT 'this is a WindowView table'; + +SET allow_experimental_analyzer = 1; + SELECT name, comment FROM system.tables -WHERE name IN ('t1', 't2', 't3') AND database = currentDatabase() order by name; +WHERE name IN ('t1', 't2', 't3', 't4', 't5', 't6', 't7') + AND database = currentDatabase() order by name; SHOW CREATE TABLE t1; -DROP TABLE t1; -DROP TABLE t2; -DROP TABLE t3; +DROP TABLE t1, t2, t3, t4, t5, t6; +DROP VIEW t7; From b1029fbd671310a4c8d48070f87d84f33f8842fd Mon Sep 17 00:00:00 2001 From: xogoodnow Date: Mon, 22 Jul 2024 22:02:54 +0330 Subject: [PATCH 133/661] Fixed the style issue --- src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.cpp b/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.cpp index e217d93975d..f51a7a913b8 100644 --- a/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.cpp +++ b/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.cpp @@ -37,6 +37,7 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; extern const int BAD_ARGUMENTS; extern const int BAD_QUERY_PARAMETER; + extern const int QUERY_NOT_ALLOWED; } namespace From 71cdf82643fb17b5b68003df314c54ebbca0842f Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Mon, 22 Jul 2024 18:41:30 +0000 Subject: [PATCH 134/661] Fix: reset is_async_state flag --- src/Processors/Sources/RemoteSource.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/Processors/Sources/RemoteSource.cpp b/src/Processors/Sources/RemoteSource.cpp index 46c27676e12..2f9a30296be 100644 --- a/src/Processors/Sources/RemoteSource.cpp +++ b/src/Processors/Sources/RemoteSource.cpp @@ -115,7 +115,10 @@ void RemoteSource::onAsyncJobReady() if (!was_query_sent) return; + chassert(!preprocessed_packet); preprocessed_packet = query_executor->processParallelReplicaPacketIfAny(); + if (preprocessed_packet) + is_async_state = false; } std::optional RemoteSource::tryGenerate() From 155b28227972fe2f33dac98d3c471c555637d246 Mon Sep 17 00:00:00 2001 From: joelynch Date: Mon, 22 Jul 2024 21:30:40 +0200 Subject: [PATCH 135/661] This cannot be fasttest because it uses Kafka engine --- tests/queries/0_stateless/01821_table_comment.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01821_table_comment.sql b/tests/queries/0_stateless/01821_table_comment.sql index 32b89af0750..4946e46d37a 100644 --- a/tests/queries/0_stateless/01821_table_comment.sql +++ b/tests/queries/0_stateless/01821_table_comment.sql @@ -1,4 +1,4 @@ --- Tags: no-parallel +-- Tags: no-parallel, no-fasttest DROP TABLE IF EXISTS t1; DROP TABLE IF EXISTS t2; From 7f5c58f599d34f690c4a04e4223a2f86a433d0e9 Mon Sep 17 00:00:00 2001 From: Michael Stetsyuk Date: Mon, 22 Jul 2024 20:58:56 +0100 Subject: [PATCH 136/661] fxs --- tests/queries/0_stateless/01293_show_clusters.reference | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/01293_show_clusters.reference b/tests/queries/0_stateless/01293_show_clusters.reference index 9569fcf2e37..e140f207022 100644 --- a/tests/queries/0_stateless/01293_show_clusters.reference +++ b/tests/queries/0_stateless/01293_show_clusters.reference @@ -1,3 +1,3 @@ test_shard_localhost -test_cluster_one_shard_two_replicas 1 1 0 1 127.0.0.1 127.0.0.1 9000 1 default 0 NULL -test_cluster_one_shard_two_replicas 1 1 0 2 127.0.0.2 127.0.0.2 9000 0 default 0 NULL +test_cluster_one_shard_two_replicas 1 1 0 1 127.0.0.1 127.0.0.1 9000 1 default +test_cluster_one_shard_two_replicas 1 1 0 2 127.0.0.2 127.0.0.2 9000 0 default From 72f4919fdad5217f48bd83e51ce2d1f3f083087b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mi=D1=81hael=20Stetsyuk?= <59827607+mstetsyuk@users.noreply.github.com> Date: Mon, 22 Jul 2024 21:33:47 +0100 Subject: [PATCH 137/661] Update src/Common/CurrentMetrics.cpp Co-authored-by: Sema Checherinda <104093494+CheSema@users.noreply.github.com> --- src/Common/CurrentMetrics.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Common/CurrentMetrics.cpp b/src/Common/CurrentMetrics.cpp index 2fedba0175b..39198147794 100644 --- a/src/Common/CurrentMetrics.cpp +++ b/src/Common/CurrentMetrics.cpp @@ -307,7 +307,7 @@ M(FilteringMarksWithPrimaryKey, "Number of threads currently doing filtering of mark ranges by the primary key") \ M(FilteringMarksWithSecondaryKeys, "Number of threads currently doing filtering of mark ranges by secondary keys") \ \ - M(S3DiskNoKeyErrors, "Number of no-key S3 disk errors") \ + M(S3DiskNoKeyErrors, "The number of `NoSuchKey` errors that occur when reading data from S3 cloud storage through ClickHouse disks.") \ #ifdef APPLY_FOR_EXTERNAL_METRICS #define APPLY_FOR_METRICS(M) APPLY_FOR_BUILTIN_METRICS(M) APPLY_FOR_EXTERNAL_METRICS(M) From 8fb560d2575c121b252ab1e6d8e13f9486dc2b38 Mon Sep 17 00:00:00 2001 From: Andrey Zvonov Date: Tue, 23 Jul 2024 07:08:58 +0000 Subject: [PATCH 138/661] add replica sync --- tests/integration/test_broken_projections/test.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/integration/test_broken_projections/test.py b/tests/integration/test_broken_projections/test.py index e198f98e4c5..a565f47449f 100644 --- a/tests/integration/test_broken_projections/test.py +++ b/tests/integration/test_broken_projections/test.py @@ -433,6 +433,7 @@ def test_broken_ignored_replicated(cluster): check(node, table_name, 1) create_table(node, table_name2, 2, table_name) + node.query(f"system sync replica {table_name}") check(node, table_name2, 1) break_projection(node, table_name, "proj1", "all_0_0_0", "data") From d7ffbab7c4eab820b303bd80f6b52e856f4e1d47 Mon Sep 17 00:00:00 2001 From: vdimir Date: Tue, 23 Jul 2024 09:23:18 +0200 Subject: [PATCH 139/661] Add test from #66378 Co-authored-by: Azat Khuzhin --- src/Parsers/ASTTablesInSelectQuery.cpp | 1 + .../0_stateless/03204_format_join_on.reference | 4 ++++ tests/queries/0_stateless/03204_format_join_on.sh | 15 +++++++++++++++ 3 files changed, 20 insertions(+) create mode 100644 tests/queries/0_stateless/03204_format_join_on.reference create mode 100644 tests/queries/0_stateless/03204_format_join_on.sh diff --git a/src/Parsers/ASTTablesInSelectQuery.cpp b/src/Parsers/ASTTablesInSelectQuery.cpp index dbb2a008bae..b6d42513aa7 100644 --- a/src/Parsers/ASTTablesInSelectQuery.cpp +++ b/src/Parsers/ASTTablesInSelectQuery.cpp @@ -235,6 +235,7 @@ void ASTTableJoin::formatImplAfterTable(const FormatSettings & settings, FormatS else if (on_expression) { settings.ostr << (settings.hilite ? hilite_keyword : "") << " ON " << (settings.hilite ? hilite_none : ""); + /// If there is an alias for the whole expression parens should be added, otherwise it will be invalid syntax bool on_has_alias = !on_expression->tryGetAlias().empty(); if (on_has_alias) settings.ostr << "("; diff --git a/tests/queries/0_stateless/03204_format_join_on.reference b/tests/queries/0_stateless/03204_format_join_on.reference new file mode 100644 index 00000000000..846f36fcca4 --- /dev/null +++ b/tests/queries/0_stateless/03204_format_join_on.reference @@ -0,0 +1,4 @@ +SELECT * FROM t1 INNER JOIN t2 ON ((t1.x = t2.x) AND (t1.x IS NULL) AS e2) +SELECT * FROM t1 INNER JOIN t2 ON ((t1.x = t2.x) AND (t1.x IS NULL) AS e2) +SELECT * FROM t1 INNER JOIN t2 ON (t1.x = t2.x) AND ((t1.x IS NULL) AS e2) +SELECT * FROM t1 INNER JOIN t2 ON t1.x = t2.x diff --git a/tests/queries/0_stateless/03204_format_join_on.sh b/tests/queries/0_stateless/03204_format_join_on.sh new file mode 100644 index 00000000000..87b0afac042 --- /dev/null +++ b/tests/queries/0_stateless/03204_format_join_on.sh @@ -0,0 +1,15 @@ +#!/usr/bin/env bash + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +# regression for the JOIN ON alias for the whole expression +phase1="$($CLICKHOUSE_FORMAT --oneline --query "SELECT * FROM t1 JOIN t2 ON ((t1.x = t2.x) AND (t1.x IS NULL) AS e2)")" +echo "$phase1" +# phase 2 +$CLICKHOUSE_FORMAT --oneline --query "$phase1" + +# other test cases +$CLICKHOUSE_FORMAT --oneline --query "SELECT * FROM t1 JOIN t2 ON (t1.x = t2.x) AND (t1.x IS NULL AS e2)" +$CLICKHOUSE_FORMAT --oneline --query "SELECT * FROM t1 JOIN t2 ON t1.x = t2.x" From 223eee3f46b07c38de3223fb56575f9ecbc5bea7 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Tue, 23 Jul 2024 07:57:03 +0000 Subject: [PATCH 140/661] Comment to new IProcessor method --- src/Processors/IProcessor.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/src/Processors/IProcessor.h b/src/Processors/IProcessor.h index 0776921a814..94e93595f4e 100644 --- a/src/Processors/IProcessor.h +++ b/src/Processors/IProcessor.h @@ -221,6 +221,21 @@ public: throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method 'schedule' is not implemented for {} processor", getName()); } + /* The method is called right after asynchronous job is done + * i.e. when file descriptor returned by schedule() is readable. + * The sequence of method calls: + * ... prepare() -> schedule() -> onAsyncJobReady() -> work() ... + * See also comment to schedule() method + * + * It allows doing some preprocessing immediately after asynchronous job is done. + * The implementation should return control quickly, to avoid blocking another asynchronous completed jobs + * created by the same pipeline. + * + * Example, scheduling tasks for remote workers (file descriptor in this case is a socket) + * When the remote worker asks for the next task, doing it in onAsyncJobReady() we can provide it immediately. + * Otherwise, the returning of the next task for the remote worker can be delayed by current work done in the pipeline + * i.e. processor->work(), which will create unnecessary latency in query processing by remote workers Not Committed Yet + */ virtual void onAsyncJobReady() {} /** You must call this method if 'prepare' returned ExpandPipeline. From b6ad57aa37f01ed4d101bd059b04222f361245ff Mon Sep 17 00:00:00 2001 From: Nikita Fomichev Date: Mon, 22 Jul 2024 19:25:34 +0200 Subject: [PATCH 141/661] Stateless tests: change status for failed tests in case of server crash --- .../util/process_functional_tests_result.py | 32 ++++++++++++------- 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/docker/test/util/process_functional_tests_result.py b/docker/test/util/process_functional_tests_result.py index 4442c9d7d9e..dbe50eeade0 100755 --- a/docker/test/util/process_functional_tests_result.py +++ b/docker/test/util/process_functional_tests_result.py @@ -12,6 +12,7 @@ UNKNOWN_SIGN = "[ UNKNOWN " SKIPPED_SIGN = "[ SKIPPED " HUNG_SIGN = "Found hung queries in processlist" SERVER_DIED_SIGN = "Server died, terminating all processes" +SERVER_DIED_SIGN2 = "Server does not respond to health check" DATABASE_SIGN = "Database: " SUCCESS_FINISH_SIGNS = ["All tests have finished", "No tests were run"] @@ -43,7 +44,7 @@ def process_test_log(log_path, broken_tests): if HUNG_SIGN in line: hung = True break - if SERVER_DIED_SIGN in line: + if SERVER_DIED_SIGN in line or SERVER_DIED_SIGN2 in line: server_died = True if RETRIES_SIGN in line: retries = True @@ -111,12 +112,12 @@ def process_test_log(log_path, broken_tests): # Python does not support TSV, so we have to escape '\t' and '\n' manually # and hope that complex escape sequences will not break anything test_results = [ - ( + [ test[0], test[1], test[2], "".join(test[3])[:4096].replace("\t", "\\t").replace("\n", "\\n"), - ) + ] for test in test_results ] @@ -170,18 +171,24 @@ def process_result(result_path, broken_tests): if hung: description = "Some queries hung, " state = "failure" - test_results.append(("Some queries hung", "FAIL", "0", "")) + test_results.append(["Some queries hung", "FAIL", "0", ""]) elif server_died: description = "Server died, " state = "failure" - test_results.append(("Server died", "FAIL", "0", "")) + # When ClickHouse server crashes, some tests are still running + # and fail because they cannot connect to server + for result in test_results: + if result[1] == "FAIL": + result[1] = "SERVER_DIED" + + test_results.append(["Server died", "FAIL", "0", ""]) elif not success_finish: description = "Tests are not finished, " state = "failure" - test_results.append(("Tests are not finished", "FAIL", "0", "")) + test_results.append(["Tests are not finished", "FAIL", "0", ""]) elif retries: description = "Some tests restarted, " - test_results.append(("Some tests restarted", "SKIPPED", "0", "")) + test_results.append(["Some tests restarted", "SKIPPED", "0", ""]) else: description = "" @@ -233,11 +240,12 @@ if __name__ == "__main__": # sort by status then by check name order = { "FAIL": 0, - "Timeout": 1, - "NOT_FAILED": 2, - "BROKEN": 3, - "OK": 4, - "SKIPPED": 5, + "SERVER_DIED": 1, + "Timeout": 2, + "NOT_FAILED": 3, + "BROKEN": 4, + "OK": 5, + "SKIPPED": 6, } return order.get(item[1], 10), str(item[0]), item[1] From 492dab5e5d4fb775d8f3551e990fc078929c2bd8 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 23 Jul 2024 01:52:01 +0200 Subject: [PATCH 142/661] Update tests/queries/0_stateless/02992_all_columns_should_have_comment.sql --- .../0_stateless/02992_all_columns_should_have_comment.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02992_all_columns_should_have_comment.sql b/tests/queries/0_stateless/02992_all_columns_should_have_comment.sql index dcb7c09a973..0d34b033354 100644 --- a/tests/queries/0_stateless/02992_all_columns_should_have_comment.sql +++ b/tests/queries/0_stateless/02992_all_columns_should_have_comment.sql @@ -3,6 +3,6 @@ SELECT 'Column ' || name || ' from table ' || concat(database, '.', table) || ' FROM system.columns WHERE (database = 'system') AND (comment = '') AND - (table NOT ILIKE '%_log_%') AND + (table NOT ILIKE '%\_log\_%') AND (table NOT IN ('numbers', 'numbers_mt', 'one', 'generate_series', 'generateSeries', 'coverage_log', 'filesystem_read_prefetches_log')) AND (default_kind != 'ALIAS'); From 932033fca9bdacbfdb544fac5389e03fa7732eeb Mon Sep 17 00:00:00 2001 From: Michael Stetsyuk Date: Tue, 23 Jul 2024 10:55:45 +0100 Subject: [PATCH 143/661] use atomic to avoid data race --- src/Databases/DatabaseReplicatedWorker.cpp | 12 +++++++++--- src/Databases/DatabaseReplicatedWorker.h | 5 +++-- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/src/Databases/DatabaseReplicatedWorker.cpp b/src/Databases/DatabaseReplicatedWorker.cpp index cea2d123f87..a9a74c5f56a 100644 --- a/src/Databases/DatabaseReplicatedWorker.cpp +++ b/src/Databases/DatabaseReplicatedWorker.cpp @@ -32,7 +32,8 @@ DatabaseReplicatedDDLWorker::DatabaseReplicatedDDLWorker(DatabaseReplicated * db bool DatabaseReplicatedDDLWorker::initializeMainThread() { - initialization_duration_timer.emplace(); + initialization_duration_timer.restart(); + initializing.store(true, std::memory_order_release); while (!stop_flag) { @@ -71,7 +72,7 @@ bool DatabaseReplicatedDDLWorker::initializeMainThread() initializeReplication(); initialized = true; - initialization_duration_timer.reset(); + initializing.store(false, std::memory_order_relaxed); return true; } catch (...) @@ -81,7 +82,7 @@ bool DatabaseReplicatedDDLWorker::initializeMainThread() } } - initialization_duration_timer.reset(); + initializing.store(false, std::memory_order_relaxed); return false; } @@ -463,4 +464,9 @@ UInt32 DatabaseReplicatedDDLWorker::getLogPointer() const return max_id.load(); } +UInt64 DatabaseReplicatedDDLWorker::getCurrentInitializationDurationMs() const +{ + return initializing.load(std::memory_order_acquire) ? initialization_duration_timer.elapsedMilliseconds() : 0; +} + } diff --git a/src/Databases/DatabaseReplicatedWorker.h b/src/Databases/DatabaseReplicatedWorker.h index aea3b71173d..3e5887be825 100644 --- a/src/Databases/DatabaseReplicatedWorker.h +++ b/src/Databases/DatabaseReplicatedWorker.h @@ -37,7 +37,7 @@ public: UInt32 getLogPointer() const; - UInt64 getCurrentInitializationDurationMs() const { return initialization_duration_timer ? initialization_duration_timer->elapsedMilliseconds() : 0; } + UInt64 getCurrentInitializationDurationMs() const; private: bool initializeMainThread() override; void initializeReplication(); @@ -59,7 +59,8 @@ private: /// It will remove "active" node when database is detached zkutil::EphemeralNodeHolderPtr active_node_holder; - std::optional initialization_duration_timer; + Stopwatch initialization_duration_timer; + std::atomic initializing = false; }; } From 86ff4e8b73d99daa5239104a0223271411949b4a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Tue, 23 Jul 2024 12:39:24 +0200 Subject: [PATCH 144/661] groupArrayIntersect: Fix serialization bug --- .../AggregateFunctionGroupArrayIntersect.cpp | 4 +- ...roupArrayIntersect_serialization.reference | 12 ++++++ ...3208_groupArrayIntersect_serialization.sql | 41 +++++++++++++++++++ 3 files changed, 55 insertions(+), 2 deletions(-) create mode 100644 tests/queries/0_stateless/03208_groupArrayIntersect_serialization.reference create mode 100644 tests/queries/0_stateless/03208_groupArrayIntersect_serialization.sql diff --git a/src/AggregateFunctions/AggregateFunctionGroupArrayIntersect.cpp b/src/AggregateFunctions/AggregateFunctionGroupArrayIntersect.cpp index 1529cd5899a..38f2fcb9fb9 100644 --- a/src/AggregateFunctions/AggregateFunctionGroupArrayIntersect.cpp +++ b/src/AggregateFunctions/AggregateFunctionGroupArrayIntersect.cpp @@ -47,7 +47,7 @@ struct AggregateFunctionGroupArrayIntersectData }; -/// Puts all values to the hash set. Returns an array of unique values. Implemented for numeric types. +/// Puts all values to the hash set. Returns an array of unique values present in all inputs. Implemented for numeric types. template class AggregateFunctionGroupArrayIntersect : public IAggregateFunctionDataHelper, AggregateFunctionGroupArrayIntersect> @@ -154,7 +154,7 @@ public: set.reserve(size); for (size_t i = 0; i < size; ++i) { - int key; + T key; readIntBinary(key, buf); set.insert(key); } diff --git a/tests/queries/0_stateless/03208_groupArrayIntersect_serialization.reference b/tests/queries/0_stateless/03208_groupArrayIntersect_serialization.reference new file mode 100644 index 00000000000..c3b6e0cd5b7 --- /dev/null +++ b/tests/queries/0_stateless/03208_groupArrayIntersect_serialization.reference @@ -0,0 +1,12 @@ +010101 AggregateFunction(groupArrayIntersect, Array(UInt8)) +[1] +1 [2,4,6,8,10] +2 [2,4,6,8,10] +3 [2,4,6,8,10] +5 [2,6,10] +6 [10] +7 [] +a [(['2','4','6','8','10'])] +b [(['2','4','6','8','10'])] +c [(['2','4','6','8','10'])] +d [] diff --git a/tests/queries/0_stateless/03208_groupArrayIntersect_serialization.sql b/tests/queries/0_stateless/03208_groupArrayIntersect_serialization.sql new file mode 100644 index 00000000000..e05f78a4051 --- /dev/null +++ b/tests/queries/0_stateless/03208_groupArrayIntersect_serialization.sql @@ -0,0 +1,41 @@ +SELECT hex(groupArrayIntersectState([1]) AS a), toTypeName(a); +SELECT finalizeAggregation(CAST(unhex('010101'), 'AggregateFunction(groupArrayIntersect, Array(UInt8))')); + +DROP TABLE IF EXISTS grouparray; +CREATE TABLE grouparray +( + `v` AggregateFunction(groupArrayIntersect, Array(UInt8)) +) +ENGINE = Log; + +INSERT INTO grouparray Select groupArrayIntersectState([2, 4, 6, 8, 10]::Array(UInt8)); +SELECT '1', arraySort(groupArrayIntersectMerge(v)) FROM grouparray; +INSERT INTO grouparray Select groupArrayIntersectState([2, 4, 6, 8, 10]::Array(UInt8)); +SELECT '2', arraySort(groupArrayIntersectMerge(v)) FROM grouparray; +INSERT INTO grouparray Select groupArrayIntersectState([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]::Array(UInt8)); +SELECT '3', arraySort(groupArrayIntersectMerge(v)) FROM grouparray; +INSERT INTO grouparray Select groupArrayIntersectState([2, 6, 10]::Array(UInt8)); +SELECT '5', arraySort(groupArrayIntersectMerge(v)) FROM grouparray; +INSERT INTO grouparray Select groupArrayIntersectState([10]::Array(UInt8)); +SELECT '6', arraySort(groupArrayIntersectMerge(v)) FROM grouparray; +INSERT INTO grouparray Select groupArrayIntersectState([]::Array(UInt8)); +SELECT '7', arraySort(groupArrayIntersectMerge(v)) FROM grouparray; + +DROP TABLE IF EXISTS grouparray; + + +DROP TABLE IF EXISTS grouparray_string; +CREATE TABLE grouparray_string +( + `v` AggregateFunction(groupArrayIntersect, Array(Tuple(Array(String)))) +) +ENGINE = Log; + +INSERT INTO grouparray_string Select groupArrayIntersectState([tuple(['2', '4', '6', '8', '10'])]); +SELECT 'a', arraySort(groupArrayIntersectMerge(v)) FROM grouparray_string; +INSERT INTO grouparray_string Select groupArrayIntersectState([tuple(['2', '4', '6', '8', '10']), tuple(['2', '4', '6', '8', '10'])]); +SELECT 'b', arraySort(groupArrayIntersectMerge(v)) FROM grouparray_string; +INSERT INTO grouparray_string Select groupArrayIntersectState([tuple(['2', '4', '6', '8', '10']), tuple(['2', '4', '6', '8', '10', '14'])]); +SELECT 'c', arraySort(groupArrayIntersectMerge(v)) FROM grouparray_string; +INSERT INTO grouparray_string Select groupArrayIntersectState([tuple(['2', '4', '6', '8', '10', '20']), tuple(['2', '4', '6', '8', '10', '14'])]); +SELECT 'd', arraySort(groupArrayIntersectMerge(v)) FROM grouparray_string; From 0256dba672bd23302b845f7d78f4663e3c633140 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Tue, 23 Jul 2024 13:20:41 +0200 Subject: [PATCH 145/661] Make 02987_group_array_intersect smaller --- .../0_stateless/02987_group_array_intersect.reference | 8 ++++---- .../0_stateless/02987_group_array_intersect.sql | 10 +++++----- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/tests/queries/0_stateless/02987_group_array_intersect.reference b/tests/queries/0_stateless/02987_group_array_intersect.reference index 7ec64a889f5..ec4d07742af 100644 --- a/tests/queries/0_stateless/02987_group_array_intersect.reference +++ b/tests/queries/0_stateless/02987_group_array_intersect.reference @@ -8,12 +8,12 @@ [1,4,5] [] [] -1000000 -999999 +100000 +99999 [9] ['a','c'] -1000000 -999999 +50000 +49999 ['1'] [] ['2023-01-01 00:00:00'] diff --git a/tests/queries/0_stateless/02987_group_array_intersect.sql b/tests/queries/0_stateless/02987_group_array_intersect.sql index 321e860b0a8..15acd0ca900 100644 --- a/tests/queries/0_stateless/02987_group_array_intersect.sql +++ b/tests/queries/0_stateless/02987_group_array_intersect.sql @@ -39,15 +39,15 @@ DROP TABLE test_numbers; DROP TABLE IF EXISTS test_big_numbers_sep; CREATE TABLE test_big_numbers_sep (a Array(Int64)) engine=MergeTree ORDER BY a; -INSERT INTO test_big_numbers_sep SELECT array(number) FROM numbers_mt(1000000); +INSERT INTO test_big_numbers_sep SELECT array(number) FROM numbers_mt(100000); SELECT groupArrayIntersect(*) FROM test_big_numbers_sep; DROP TABLE test_big_numbers_sep; DROP TABLE IF EXISTS test_big_numbers; CREATE TABLE test_big_numbers (a Array(Int64)) engine=MergeTree ORDER BY a; -INSERT INTO test_big_numbers SELECT range(1000000); +INSERT INTO test_big_numbers SELECT range(100000); SELECT length(groupArrayIntersect(*)) FROM test_big_numbers; -INSERT INTO test_big_numbers SELECT range(999999); +INSERT INTO test_big_numbers SELECT range(99999); SELECT length(groupArrayIntersect(*)) FROM test_big_numbers; INSERT INTO test_big_numbers VALUES ([9]); SELECT groupArrayIntersect(*) FROM test_big_numbers; @@ -63,9 +63,9 @@ DROP TABLE test_string; DROP TABLE IF EXISTS test_big_string; CREATE TABLE test_big_string (a Array(String)) engine=MergeTree ORDER BY a; -INSERT INTO test_big_string SELECT groupArray(toString(number)) FROM numbers_mt(1000000); +INSERT INTO test_big_string SELECT groupArray(toString(number)) FROM numbers_mt(50000); SELECT length(groupArrayIntersect(*)) FROM test_big_string; -INSERT INTO test_big_string SELECT groupArray(toString(number)) FROM numbers_mt(999999); +INSERT INTO test_big_string SELECT groupArray(toString(number)) FROM numbers_mt(49999); SELECT length(groupArrayIntersect(*)) FROM test_big_string; INSERT INTO test_big_string VALUES (['1']); SELECT groupArrayIntersect(*) FROM test_big_string; From aaf603035e31874d6d5bcd024d0f4040715baa72 Mon Sep 17 00:00:00 2001 From: Michael Stetsyuk Date: Tue, 23 Jul 2024 13:35:37 +0100 Subject: [PATCH 146/661] check error type --- src/IO/S3/Client.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/IO/S3/Client.cpp b/src/IO/S3/Client.cpp index 325c820f8bd..7196dfa9bdc 100644 --- a/src/IO/S3/Client.cpp +++ b/src/IO/S3/Client.cpp @@ -385,7 +385,7 @@ Model::HeadObjectOutcome Client::HeadObject(HeadObjectRequest & request) const request.overrideURI(std::move(*bucket_uri)); - if (isClientForDisk()) + if (isClientForDisk() && error.GetErrorType() == Aws::S3::S3Errors::NO_SUCH_KEY) CurrentMetrics::add(CurrentMetrics::S3DiskNoKeyErrors); return enrichErrorMessage( @@ -410,7 +410,7 @@ Model::ListObjectsOutcome Client::ListObjects(ListObjectsRequest & request) cons Model::GetObjectOutcome Client::GetObject(GetObjectRequest & request) const { auto resp = doRequest(request, [this](const Model::GetObjectRequest & req) { return GetObject(req); }); - if (!resp.IsSuccess() && isClientForDisk()) + if (!resp.IsSuccess() && isClientForDisk() && resp.GetError().GetErrorType() == Aws::S3::S3Errors::NO_SUCH_KEY) CurrentMetrics::add(CurrentMetrics::S3DiskNoKeyErrors); return enrichErrorMessage(std::move(resp)); From d74dc587d7a183225b7cf0846b85e8213dcb7fc0 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Tue, 23 Jul 2024 13:06:58 +0000 Subject: [PATCH 147/661] Fix comment --- src/Processors/IProcessor.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Processors/IProcessor.h b/src/Processors/IProcessor.h index 94e93595f4e..4fd00d5e164 100644 --- a/src/Processors/IProcessor.h +++ b/src/Processors/IProcessor.h @@ -234,7 +234,7 @@ public: * Example, scheduling tasks for remote workers (file descriptor in this case is a socket) * When the remote worker asks for the next task, doing it in onAsyncJobReady() we can provide it immediately. * Otherwise, the returning of the next task for the remote worker can be delayed by current work done in the pipeline - * i.e. processor->work(), which will create unnecessary latency in query processing by remote workers Not Committed Yet + * (by other processors), which will create unnecessary latency in query processing by remote workers */ virtual void onAsyncJobReady() {} From 9d55553225c4c5e253e32fb0de9944a2e29b7bcf Mon Sep 17 00:00:00 2001 From: vdimir Date: Tue, 23 Jul 2024 18:52:50 +0000 Subject: [PATCH 148/661] Forbid create as select even when database_replicated_allow_heavy_create is set --- src/Interpreters/InterpreterCreateQuery.cpp | 16 ++++++++++++---- ...ed_database_forbid_create_as_select.reference | 2 ++ ...eplicated_database_forbid_create_as_select.sh | 8 ++++++-- 3 files changed, 20 insertions(+), 6 deletions(-) diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index ea10ad59db4..2f837fe4d2b 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -1329,8 +1329,8 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create) if (need_add_to_database) database = DatabaseCatalog::instance().tryGetDatabase(database_name); - bool allow_heavy_create = getContext()->getSettingsRef().database_replicated_allow_heavy_create; - if (!allow_heavy_create && database && database->getEngineName() == "Replicated" && (create.select || create.is_populate)) + bool allow_heavy_populate = getContext()->getSettingsRef().database_replicated_allow_heavy_create && create.is_populate; + if (!allow_heavy_populate && database && database->getEngineName() == "Replicated" && (create.select || create.is_populate)) { bool is_storage_replicated = false; if (create.storage && create.storage->engine) @@ -1342,10 +1342,18 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create) const bool allow_create_select_for_replicated = (create.isView() && !create.is_populate) || create.is_create_empty || !is_storage_replicated; if (!allow_create_select_for_replicated) + { + /// POPULATE can be enabled with setting, provide hint in error message + if (create.is_populate) + throw Exception( + ErrorCodes::SUPPORT_IS_DISABLED, + "CREATE with POPULATE is not supported with Replicated databases. Consider using separate CREATE and INSERT queries. " + "Alternatively, you can enable 'database_replicated_allow_heavy_create' setting to allow this operation, use with caution"); + throw Exception( ErrorCodes::SUPPORT_IS_DISABLED, - "CREATE AS SELECT and POPULATE is not supported with Replicated databases. Consider using separate CREATE and INSERT queries. " - "Alternatively, you can enable 'database_replicated_allow_heavy_create' setting to allow this operation, use with caution"); + "CREATE AS SELECT is not supported with Replicated databases. Consider using separate CREATE and INSERT queries."); + } } if (database && database->shouldReplicateQuery(getContext(), query_ptr)) diff --git a/tests/queries/0_stateless/02933_replicated_database_forbid_create_as_select.reference b/tests/queries/0_stateless/02933_replicated_database_forbid_create_as_select.reference index 6ed281c757a..98fb6a68656 100644 --- a/tests/queries/0_stateless/02933_replicated_database_forbid_create_as_select.reference +++ b/tests/queries/0_stateless/02933_replicated_database_forbid_create_as_select.reference @@ -1,2 +1,4 @@ 1 1 +1 +1 diff --git a/tests/queries/0_stateless/02933_replicated_database_forbid_create_as_select.sh b/tests/queries/0_stateless/02933_replicated_database_forbid_create_as_select.sh index 15f169d880f..b587549cb60 100755 --- a/tests/queries/0_stateless/02933_replicated_database_forbid_create_as_select.sh +++ b/tests/queries/0_stateless/02933_replicated_database_forbid_create_as_select.sh @@ -18,8 +18,12 @@ ${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --query "CREATE MATERIAL ${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --query "CREATE TABLE ${CLICKHOUSE_DATABASE}_db.test2 (id UInt64) ENGINE = ReplicatedMergeTree ORDER BY id AS SELECT 1" |& grep -cm1 "SUPPORT_IS_DISABLED" ${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --query "CREATE MATERIALIZED VIEW ${CLICKHOUSE_DATABASE}_db.test_mv2 (id UInt64) ENGINE = ReplicatedMergeTree ORDER BY id POPULATE AS SELECT 1" |& grep -cm1 "SUPPORT_IS_DISABLED" -# But it is allowed with the special setting -${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --query "CREATE TABLE ${CLICKHOUSE_DATABASE}_db.test2 (id UInt64) ENGINE = ReplicatedMergeTree ORDER BY id AS SELECT 1" --database_replicated_allow_heavy_create=1 +# POPULATE is allowed with the special setting ${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --query "CREATE MATERIALIZED VIEW ${CLICKHOUSE_DATABASE}_db.test_mv2 (id UInt64) ENGINE = ReplicatedMergeTree ORDER BY id POPULATE AS SELECT 1" --database_replicated_allow_heavy_create=1 +${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --query "CREATE MATERIALIZED VIEW ${CLICKHOUSE_DATABASE}_db.test_mv3 (id UInt64) ENGINE = ReplicatedMergeTree ORDER BY id POPULATE AS SELECT 1" --compatibility='24.6' + +# AS SELECT is forbidden even with the setting +${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --query "CREATE TABLE ${CLICKHOUSE_DATABASE}_db.test2 (id UInt64) ENGINE = ReplicatedMergeTree ORDER BY id AS SELECT 1" --database_replicated_allow_heavy_create=1 |& grep -cm1 "SUPPORT_IS_DISABLED" +${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --query "CREATE TABLE ${CLICKHOUSE_DATABASE}_db.test2 (id UInt64) ENGINE = ReplicatedMergeTree ORDER BY id AS SELECT 1" --compatibility='24.6' |& grep -cm1 "SUPPORT_IS_DISABLED" ${CLICKHOUSE_CLIENT} --query "DROP DATABASE ${CLICKHOUSE_DATABASE}_db" From c850fac65276342e0b8694fa00d44dd3269d1abc Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 24 Jul 2024 09:05:06 +0200 Subject: [PATCH 149/661] Fix error --- src/Parsers/MySQL/tests/gtest_column_parser.cpp | 11 ++++++----- src/Parsers/ParserCreateQuery.h | 7 ++++--- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/src/Parsers/MySQL/tests/gtest_column_parser.cpp b/src/Parsers/MySQL/tests/gtest_column_parser.cpp index 21c37e4ee2e..3a9a0690f06 100644 --- a/src/Parsers/MySQL/tests/gtest_column_parser.cpp +++ b/src/Parsers/MySQL/tests/gtest_column_parser.cpp @@ -1,13 +1,14 @@ #include #include #include -#include +#include #include #include #include #include #include + using namespace DB; using namespace DB::MySQLParser; @@ -19,8 +20,8 @@ TEST(ParserColumn, AllNonGeneratedColumnOption) "COLUMN_FORMAT FIXED STORAGE MEMORY REFERENCES tbl_name (col_01) CHECK 1"; ASTPtr ast = parseQuery(p_column, input.data(), input.data() + input.size(), "", 0, 0, 0); EXPECT_EQ(ast->as()->name, "col_01"); - EXPECT_EQ(ast->as()->data_type->as()->name, "VARCHAR"); - EXPECT_EQ(ast->as()->data_type->as()->arguments->children[0]->as()->value.safeGet(), 100); + EXPECT_EQ(ast->as()->data_type->as()->name, "VARCHAR"); + EXPECT_EQ(ast->as()->data_type->as()->arguments->children[0]->as()->value.safeGet(), 100); ASTDeclareOptions * declare_options = ast->as()->column_options->as(); EXPECT_EQ(declare_options->changes["is_null"]->as()->value.safeGet(), 0); @@ -44,8 +45,8 @@ TEST(ParserColumn, AllGeneratedColumnOption) "REFERENCES tbl_name (col_01) CHECK 1 GENERATED ALWAYS AS (1) STORED"; ASTPtr ast = parseQuery(p_column, input.data(), input.data() + input.size(), "", 0, 0, 0); EXPECT_EQ(ast->as()->name, "col_01"); - EXPECT_EQ(ast->as()->data_type->as()->name, "VARCHAR"); - EXPECT_EQ(ast->as()->data_type->as()->arguments->children[0]->as()->value.safeGet(), 100); + EXPECT_EQ(ast->as()->data_type->as()->name, "VARCHAR"); + EXPECT_EQ(ast->as()->data_type->as()->arguments->children[0]->as()->value.safeGet(), 100); ASTDeclareOptions * declare_options = ast->as()->column_options->as(); EXPECT_EQ(declare_options->changes["is_null"]->as()->value.safeGet(), 1); diff --git a/src/Parsers/ParserCreateQuery.h b/src/Parsers/ParserCreateQuery.h index 7bd1d1bf588..53a62deb22b 100644 --- a/src/Parsers/ParserCreateQuery.h +++ b/src/Parsers/ParserCreateQuery.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include #include @@ -13,6 +14,7 @@ #include #include + namespace DB { @@ -268,9 +270,8 @@ bool IParserColumnDeclaration::parseImpl(Pos & pos, ASTPtr & node, E auto default_function = std::make_shared(); default_function->name = "defaultValueOfTypeName"; default_function->arguments = std::make_shared(); - // Ephemeral columns don't really have secrets but we need to format - // into a String, hence the strange call - default_function->arguments->children.emplace_back(std::make_shared(type->as()->formatForLogging())); + /// Ephemeral columns don't really have secrets but we need to format into a String, hence the strange call + default_function->arguments->children.emplace_back(std::make_shared(type->as()->formatForLogging())); default_expression = default_function; } From c3204fb89577e50ec7ef2c7ddd3c62f913e084f2 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 24 Jul 2024 09:25:22 +0200 Subject: [PATCH 150/661] Fix error --- src/Parsers/ExpressionElementParsers.cpp | 3 +-- src/Parsers/ExpressionListParsers.cpp | 17 +++++++++++++++++ src/Parsers/ExpressionListParsers.h | 10 ++++++++++ src/Parsers/ParserCreateIndexQuery.cpp | 2 +- src/Parsers/ParserCreateQuery.cpp | 4 ++-- 5 files changed, 31 insertions(+), 5 deletions(-) diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp index d4fc9a4bc4d..865d07faaa7 100644 --- a/src/Parsers/ExpressionElementParsers.cpp +++ b/src/Parsers/ExpressionElementParsers.cpp @@ -9,8 +9,8 @@ #include #include #include -#include "Parsers/CommonParsers.h" +#include #include #include #include @@ -725,7 +725,6 @@ bool ParserStatisticsType::parseImpl(Pos & pos, ASTPtr & node, Expected & expect function_node->name = "STATISTICS"; function_node->arguments = stat_type; function_node->children.push_back(function_node->arguments); - node = function_node; return true; } diff --git a/src/Parsers/ExpressionListParsers.cpp b/src/Parsers/ExpressionListParsers.cpp index f97c042e91e..66817fafa5e 100644 --- a/src/Parsers/ExpressionListParsers.cpp +++ b/src/Parsers/ExpressionListParsers.cpp @@ -2388,6 +2388,23 @@ bool ParserFunction::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) } } +bool ParserExpressionWithOptionalArguments::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + ParserIdentifier id_p; + ParserFunction func_p; + + if (ParserFunction(false, false).parse(pos, node, expected)) + return true; + + if (ParserIdentifier().parse(pos, node, expected)) + { + node = makeASTFunction(node->as()->name()); + return true; + } + + return false; +} + const std::vector> ParserExpressionImpl::operators_table { {"->", Operator("lambda", 1, 2, OperatorType::Lambda)}, diff --git a/src/Parsers/ExpressionListParsers.h b/src/Parsers/ExpressionListParsers.h index 235d5782630..6ab38416f32 100644 --- a/src/Parsers/ExpressionListParsers.h +++ b/src/Parsers/ExpressionListParsers.h @@ -144,6 +144,16 @@ protected: }; +/** Similar to ParserFunction (and yields ASTFunction), but can also parse identifiers without braces. + */ +class ParserExpressionWithOptionalArguments : public IParserBase +{ +protected: + const char * getName() const override { return "expression with optional parameters"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + + /** An expression with an infix binary left-associative operator. * For example, a + b - c + d. */ diff --git a/src/Parsers/ParserCreateIndexQuery.cpp b/src/Parsers/ParserCreateIndexQuery.cpp index 2761c99738b..b815ba60bab 100644 --- a/src/Parsers/ParserCreateIndexQuery.cpp +++ b/src/Parsers/ParserCreateIndexQuery.cpp @@ -21,7 +21,7 @@ bool ParserCreateIndexDeclaration::parseImpl(Pos & pos, ASTPtr & node, Expected ParserToken close_p(TokenType::ClosingRoundBracket); ParserOrderByExpressionList order_list_p; - ParserFunction type_p; + ParserExpressionWithOptionalArguments type_p; ParserExpression expression_p; ParserUnsignedInteger granularity_p; diff --git a/src/Parsers/ParserCreateQuery.cpp b/src/Parsers/ParserCreateQuery.cpp index 92c0e7b2558..5da6c3a2510 100644 --- a/src/Parsers/ParserCreateQuery.cpp +++ b/src/Parsers/ParserCreateQuery.cpp @@ -180,7 +180,7 @@ bool ParserIndexDeclaration::parseImpl(Pos & pos, ASTPtr & node, Expected & expe ParserKeyword s_granularity(Keyword::GRANULARITY); ParserIdentifier name_p; - ParserFunction type_p; + ParserExpressionWithOptionalArguments type_p; ParserExpression expression_p; ParserUnsignedInteger granularity_p; @@ -240,7 +240,7 @@ bool ParserStatisticsDeclaration::parseImpl(Pos & pos, ASTPtr & node, Expected & ParserKeyword s_type(Keyword::TYPE); ParserList columns_p(std::make_unique(), std::make_unique(TokenType::Comma), false); - ParserList types_p(std::make_unique(), std::make_unique(TokenType::Comma), false); + ParserList types_p(std::make_unique(), std::make_unique(TokenType::Comma), false); ASTPtr columns; ASTPtr types; From 5fe78d47bc855867f6431ad06e019b3e0278d0ae Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 24 Jul 2024 10:24:13 +0200 Subject: [PATCH 151/661] Compatibility --- src/Parsers/ExpressionListParsers.cpp | 1 + src/Parsers/ParserCreateIndexQuery.cpp | 10 +--------- src/Parsers/ParserCreateQuery.cpp | 10 +--------- 3 files changed, 3 insertions(+), 18 deletions(-) diff --git a/src/Parsers/ExpressionListParsers.cpp b/src/Parsers/ExpressionListParsers.cpp index 66817fafa5e..a9715cec81e 100644 --- a/src/Parsers/ExpressionListParsers.cpp +++ b/src/Parsers/ExpressionListParsers.cpp @@ -2399,6 +2399,7 @@ bool ParserExpressionWithOptionalArguments::parseImpl(Pos & pos, ASTPtr & node, if (ParserIdentifier().parse(pos, node, expected)) { node = makeASTFunction(node->as()->name()); + node->as().no_empty_args = true; return true; } diff --git a/src/Parsers/ParserCreateIndexQuery.cpp b/src/Parsers/ParserCreateIndexQuery.cpp index b815ba60bab..9ebee4cc852 100644 --- a/src/Parsers/ParserCreateIndexQuery.cpp +++ b/src/Parsers/ParserCreateIndexQuery.cpp @@ -69,15 +69,7 @@ bool ParserCreateIndexDeclaration::parseImpl(Pos & pos, ASTPtr & node, Expected if (s_type.ignore(pos, expected)) { if (!type_p.parse(pos, type, expected)) - { - if (ParserIdentifier().parse(pos, type, expected)) - { - type = makeASTFunction(type->as().name()); - type->as().no_empty_args = true; - } - else - return false; - } + return false; } if (s_granularity.ignore(pos, expected)) diff --git a/src/Parsers/ParserCreateQuery.cpp b/src/Parsers/ParserCreateQuery.cpp index 5da6c3a2510..bf5523152ac 100644 --- a/src/Parsers/ParserCreateQuery.cpp +++ b/src/Parsers/ParserCreateQuery.cpp @@ -199,15 +199,7 @@ bool ParserIndexDeclaration::parseImpl(Pos & pos, ASTPtr & node, Expected & expe return false; if (!type_p.parse(pos, type, expected)) - { - if (name_p.parse(pos, type, expected)) - { - type = makeASTFunction(type->as().name()); - type->as().no_empty_args = true; - } - else - return false; - } + return false; if (s_granularity.ignore(pos, expected)) { From e5bb485a006d93a9e00736dc37ad90a0a0a47673 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 24 Jul 2024 10:25:23 +0200 Subject: [PATCH 152/661] Compatibility --- src/Parsers/ExpressionListParsers.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Parsers/ExpressionListParsers.cpp b/src/Parsers/ExpressionListParsers.cpp index a9715cec81e..d38dc6d5f37 100644 --- a/src/Parsers/ExpressionListParsers.cpp +++ b/src/Parsers/ExpressionListParsers.cpp @@ -2399,7 +2399,7 @@ bool ParserExpressionWithOptionalArguments::parseImpl(Pos & pos, ASTPtr & node, if (ParserIdentifier().parse(pos, node, expected)) { node = makeASTFunction(node->as()->name()); - node->as().no_empty_args = true; + node->as().no_empty_args = true; return true; } From 73fc5c266f3bc254db3882bfa2f9f42db6b2bc87 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 24 Jul 2024 10:37:19 +0200 Subject: [PATCH 153/661] Fix error --- src/DataTypes/DataTypeObject.cpp | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/DataTypes/DataTypeObject.cpp b/src/DataTypes/DataTypeObject.cpp index 5636a46373f..91b9bfcb2a5 100644 --- a/src/DataTypes/DataTypeObject.cpp +++ b/src/DataTypes/DataTypeObject.cpp @@ -4,9 +4,10 @@ #include #include -#include +#include #include + namespace DB { @@ -53,13 +54,13 @@ static DataTypePtr create(const ASTPtr & arguments) ASTPtr schema_argument = arguments->children[0]; bool is_nullable = false; - if (const auto * func = schema_argument->as()) + if (const auto * type = schema_argument->as()) { - if (func->name != "Nullable" || func->arguments->children.size() != 1) + if (type->name != "Nullable" || type->arguments->children.size() != 1) throw Exception(ErrorCodes::UNEXPECTED_AST_STRUCTURE, - "Expected 'Nullable()' as parameter for type Object (function: {})", func->name); + "Expected 'Nullable()' as parameter for type Object (function: {})", type->name); - schema_argument = func->arguments->children[0]; + schema_argument = type->arguments->children[0]; is_nullable = true; } From 57a6d281000f0a49116db82e8b0b364990e61970 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 24 Jul 2024 11:17:43 +0200 Subject: [PATCH 154/661] Fix error --- src/IO/ReadWriteBufferFromHTTP.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/IO/ReadWriteBufferFromHTTP.cpp b/src/IO/ReadWriteBufferFromHTTP.cpp index 85230957b3f..17a5ed385d4 100644 --- a/src/IO/ReadWriteBufferFromHTTP.cpp +++ b/src/IO/ReadWriteBufferFromHTTP.cpp @@ -132,6 +132,14 @@ std::optional ReadWriteBufferFromHTTP::tryGetFileSize() { return std::nullopt; } + catch (const NetException &) + { + return std::nullopt; + } + catch (const Poco::Net::NetException &) + { + return std::nullopt; + } } return file_info->file_size; From b25cad23ed3b90dc8c0903710dba0714bac7219c Mon Sep 17 00:00:00 2001 From: Pablo Marcos Date: Wed, 24 Jul 2024 11:42:28 +0000 Subject: [PATCH 155/661] Use unique names for tables and files --- .../integration/test_storage_s3_queue/test.py | 29 ++++++++++--------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/tests/integration/test_storage_s3_queue/test.py b/tests/integration/test_storage_s3_queue/test.py index bf3c28c5429..cf24e91f36b 100644 --- a/tests/integration/test_storage_s3_queue/test.py +++ b/tests/integration/test_storage_s3_queue/test.py @@ -7,6 +7,7 @@ import pytest from helpers.client import QueryRuntimeException from helpers.cluster import ClickHouseCluster, ClickHouseInstance import json +from uuid import uuid4 AVAILABLE_MODES = ["unordered", "ordered"] @@ -822,7 +823,7 @@ def test_multiple_tables_streaming_sync_distributed(started_cluster, mode): def test_max_set_age(started_cluster): node = started_cluster.instances["instance"] - table_name = f"max_set_age" + table_name = f"max_set_age_{uuid4().hex}" dst_table_name = f"{table_name}_dst" keeper_path = f"/clickhouse/test_{table_name}" files_path = f"{table_name}_data" @@ -847,11 +848,11 @@ def test_max_set_age(started_cluster): ) create_mv(node, table_name, dst_table_name) - total_values = generate_random_files( + _ = generate_random_files( started_cluster, files_path, files_to_generate, row_num=1 ) - expected_rows = 10 + expected_rows = files_to_generate node.wait_for_log_line("Checking node limits") node.wait_for_log_line("Node limits check finished") @@ -865,11 +866,11 @@ def test_max_set_age(started_cluster): time.sleep(1) assert expected_rows == get_count() - assert 10 == int(node.query(f"SELECT uniq(_path) from {dst_table_name}")) + assert files_to_generate == int(node.query(f"SELECT uniq(_path) from {dst_table_name}")) time.sleep(max_age + 5) - expected_rows = 20 + expected_rows *= 2 for _ in range(20): if expected_rows == get_count(): @@ -877,7 +878,7 @@ def test_max_set_age(started_cluster): time.sleep(1) assert expected_rows == get_count() - assert 10 == int(node.query(f"SELECT uniq(_path) from {dst_table_name}")) + assert files_to_generate == int(node.query(f"SELECT uniq(_path) from {dst_table_name}")) paths_count = [ int(x) @@ -885,7 +886,7 @@ def test_max_set_age(started_cluster): f"SELECT count() from {dst_table_name} GROUP BY _path" ).splitlines() ] - assert 10 == len(paths_count) + assert files_to_generate == len(paths_count) for path_count in paths_count: assert 2 == path_count @@ -901,7 +902,8 @@ def test_max_set_age(started_cluster): values_csv = ( "\n".join((",".join(map(str, row)) for row in values)) + "\n" ).encode() - put_s3_file_content(started_cluster, f"{files_path}/fff.csv", values_csv) + file_with_error = f"fff_{uuid4().hex}.csv" + put_s3_file_content(started_cluster, f"{files_path}/{file_with_error}", values_csv) for _ in range(30): if failed_count + 1 == int( @@ -920,16 +922,17 @@ def test_max_set_age(started_cluster): node.query("SYSTEM FLUSH LOGS") assert "Cannot parse input" in node.query( - "SELECT exception FROM system.s3queue WHERE file_name ilike '%fff.csv'" + f"SELECT exception FROM system.s3queue WHERE file_name ilike '%{file_with_error}'" ) + assert 1 == int( node.query( - "SELECT count() FROM system.s3queue_log WHERE file_name ilike '%fff.csv'" + f"SELECT count() FROM system.s3queue_log WHERE file_name ilike '%{file_with_error}'" ) ) assert 1 == int( node.query( - "SELECT count() FROM system.s3queue_log WHERE file_name ilike '%fff.csv' AND notEmpty(exception)" + f"SELECT count() FROM system.s3queue_log WHERE file_name ilike '%{file_with_error}' AND notEmpty(exception)" ) ) @@ -943,11 +946,11 @@ def test_max_set_age(started_cluster): node.query("SYSTEM FLUSH LOGS") assert "Cannot parse input" in node.query( - "SELECT exception FROM system.s3queue WHERE file_name ilike '%fff.csv' ORDER BY processing_end_time DESC LIMIT 1" + f"SELECT exception FROM system.s3queue WHERE file_name ilike '%{file_with_error}' ORDER BY processing_end_time DESC LIMIT 1" ) assert 1 < int( node.query( - "SELECT count() FROM system.s3queue_log WHERE file_name ilike '%fff.csv' AND notEmpty(exception)" + f"SELECT count() FROM system.s3queue_log WHERE file_name ilike '%{file_with_error}' AND notEmpty(exception)" ) ) From 91b7001df6e827f801bd792e7bd9d96cdd947946 Mon Sep 17 00:00:00 2001 From: Pablo Marcos Date: Wed, 24 Jul 2024 12:08:31 +0000 Subject: [PATCH 156/661] Refactor test to improve it - Create wait_for_condition that checks greedily for a period of time - Remove redundant checks - Allow other tests running in parallel to have `ObjectStorageQueueFailedFiles` errors --- .../integration/test_storage_s3_queue/test.py | 56 ++++++------------- 1 file changed, 16 insertions(+), 40 deletions(-) diff --git a/tests/integration/test_storage_s3_queue/test.py b/tests/integration/test_storage_s3_queue/test.py index cf24e91f36b..e178b3b6608 100644 --- a/tests/integration/test_storage_s3_queue/test.py +++ b/tests/integration/test_storage_s3_queue/test.py @@ -860,24 +860,21 @@ def test_max_set_age(started_cluster): def get_count(): return int(node.query(f"SELECT count() FROM {dst_table_name}")) - for _ in range(20): - if expected_rows == get_count(): - break - time.sleep(1) + def wait_for_condition(check_function, max_wait_time=30): + before = time.time() + while time.time() - before < max_wait_time: + if check_function(): + return + time.sleep(0.1) + assert False - assert expected_rows == get_count() + wait_for_condition(lambda: get_count() == expected_rows) assert files_to_generate == int(node.query(f"SELECT uniq(_path) from {dst_table_name}")) time.sleep(max_age + 5) expected_rows *= 2 - - for _ in range(20): - if expected_rows == get_count(): - break - time.sleep(1) - - assert expected_rows == get_count() + wait_for_condition(lambda: get_count() == expected_rows) assert files_to_generate == int(node.query(f"SELECT uniq(_path) from {dst_table_name}")) paths_count = [ @@ -890,11 +887,12 @@ def test_max_set_age(started_cluster): for path_count in paths_count: assert 2 == path_count - failed_count = int( - node.query( + def get_object_storage_failures(): + return int(node.query( "SELECT value FROM system.events WHERE name = 'ObjectStorageQueueFailedFiles' SETTINGS system_events_show_zero_values=1" - ) - ) + )) + + failed_count = get_object_storage_failures() values = [ ["failed", 1, 1], @@ -905,31 +903,13 @@ def test_max_set_age(started_cluster): file_with_error = f"fff_{uuid4().hex}.csv" put_s3_file_content(started_cluster, f"{files_path}/{file_with_error}", values_csv) - for _ in range(30): - if failed_count + 1 == int( - node.query( - "SELECT value FROM system.events WHERE name = 'ObjectStorageQueueFailedFiles' SETTINGS system_events_show_zero_values=1" - ) - ): - break - time.sleep(1) - - assert failed_count + 1 == int( - node.query( - "SELECT value FROM system.events WHERE name = 'ObjectStorageQueueFailedFiles' SETTINGS system_events_show_zero_values=1" - ) - ) + wait_for_condition(lambda: failed_count + 1 <= get_object_storage_failures()) node.query("SYSTEM FLUSH LOGS") assert "Cannot parse input" in node.query( f"SELECT exception FROM system.s3queue WHERE file_name ilike '%{file_with_error}'" ) - assert 1 == int( - node.query( - f"SELECT count() FROM system.s3queue_log WHERE file_name ilike '%{file_with_error}'" - ) - ) assert 1 == int( node.query( f"SELECT count() FROM system.s3queue_log WHERE file_name ilike '%{file_with_error}' AND notEmpty(exception)" @@ -938,11 +918,7 @@ def test_max_set_age(started_cluster): time.sleep(max_age + 1) - assert failed_count + 2 == int( - node.query( - "SELECT value FROM system.events WHERE name = 'ObjectStorageQueueFailedFiles'" - ) - ) + assert failed_count + 2 <= get_object_storage_failures() node.query("SYSTEM FLUSH LOGS") assert "Cannot parse input" in node.query( From 01ce22049a76995dc00974618c94af9ccbcc30db Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 24 Jul 2024 14:29:14 +0200 Subject: [PATCH 157/661] Fix tests --- src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp | 11 +++++------ .../MySQL/tests/gtest_create_rewritten.cpp | 6 ++---- .../test_postgresql_replica_database_engine_2/test.py | 2 +- 3 files changed, 8 insertions(+), 11 deletions(-) diff --git a/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp b/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp index f73965cfcc8..3917ffb8823 100644 --- a/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp +++ b/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp @@ -17,7 +17,6 @@ #include #include -#include #include #include #include @@ -158,7 +157,7 @@ static ColumnsDescription createColumnsDescription(const NamesAndTypesList & col /// (see git blame for details). auto column_name_and_type = columns_name_and_type.begin(); const auto * declare_column_ast = columns_definition->children.begin(); - for (; column_name_and_type != columns_name_and_type.end(); column_name_and_type++, declare_column_ast++) + for (; column_name_and_type != columns_name_and_type.end(); ++column_name_and_type, ++declare_column_ast) { const auto & declare_column = (*declare_column_ast)->as(); String comment; @@ -177,7 +176,7 @@ static ColumnsDescription createColumnsDescription(const NamesAndTypesList & col return columns_description; } -static NamesAndTypesList getNames(const ASTDataType & expr, ContextPtr context, const NamesAndTypesList & columns) +static NamesAndTypesList getNames(const ASTFunction & expr, ContextPtr context, const NamesAndTypesList & columns) { if (expr.arguments->children.empty()) return NamesAndTypesList{}; @@ -221,9 +220,9 @@ static std::tuplechildren.empty()) { diff --git a/src/Interpreters/MySQL/tests/gtest_create_rewritten.cpp b/src/Interpreters/MySQL/tests/gtest_create_rewritten.cpp index 6d6077a0295..81e6e6a8761 100644 --- a/src/Interpreters/MySQL/tests/gtest_create_rewritten.cpp +++ b/src/Interpreters/MySQL/tests/gtest_create_rewritten.cpp @@ -2,12 +2,10 @@ #include -#include #include #include #include #include -#include #include #include #include @@ -26,8 +24,8 @@ static inline ASTPtr tryRewrittenCreateQuery(const String & query, ContextPtr co context, "test_database", "test_database")[0]; } -static const char MATERIALIZEDMYSQL_TABLE_COLUMNS[] = ", `_sign` Int8() MATERIALIZED 1" - ", `_version` UInt64() MATERIALIZED 1" +static const char MATERIALIZEDMYSQL_TABLE_COLUMNS[] = ", `_sign` Int8 MATERIALIZED 1" + ", `_version` UInt64 MATERIALIZED 1" ", INDEX _version _version TYPE minmax GRANULARITY 1"; TEST(MySQLCreateRewritten, ColumnsDataType) diff --git a/tests/integration/test_postgresql_replica_database_engine_2/test.py b/tests/integration/test_postgresql_replica_database_engine_2/test.py index 5e04c9e4d12..406b50bc486 100644 --- a/tests/integration/test_postgresql_replica_database_engine_2/test.py +++ b/tests/integration/test_postgresql_replica_database_engine_2/test.py @@ -654,7 +654,7 @@ def test_table_override(started_cluster): instance.query(f"SELECT count() FROM {materialized_database}.{table_name}") ) - expected = "CREATE TABLE test_database.table_override\\n(\\n `key` Int32,\\n `value` String,\\n `_sign` Int8() MATERIALIZED 1,\\n `_version` UInt64() MATERIALIZED 1\\n)\\nENGINE = ReplacingMergeTree(_version)\\nPARTITION BY key\\nORDER BY tuple(key)" + expected = "CREATE TABLE test_database.table_override\\n(\\n `key` Int32,\\n `value` String,\\n `_sign` Int8 MATERIALIZED 1,\\n `_version` UInt64 MATERIALIZED 1\\n)\\nENGINE = ReplacingMergeTree(_version)\\nPARTITION BY key\\nORDER BY tuple(key)" assert ( expected == instance.query( From 6efd29144558ded7fb95b36c6c19ee50aee0071f Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 24 Jul 2024 14:37:05 +0200 Subject: [PATCH 158/661] Add a test --- .../03210_inconsistent_formatting_of_data_types.reference | 1 + .../03210_inconsistent_formatting_of_data_types.sh | 7 +++++++ 2 files changed, 8 insertions(+) create mode 100644 tests/queries/0_stateless/03210_inconsistent_formatting_of_data_types.reference create mode 100755 tests/queries/0_stateless/03210_inconsistent_formatting_of_data_types.sh diff --git a/tests/queries/0_stateless/03210_inconsistent_formatting_of_data_types.reference b/tests/queries/0_stateless/03210_inconsistent_formatting_of_data_types.reference new file mode 100644 index 00000000000..ccb445a0573 --- /dev/null +++ b/tests/queries/0_stateless/03210_inconsistent_formatting_of_data_types.reference @@ -0,0 +1 @@ +ALTER TABLE columns_with_multiple_streams MODIFY COLUMN `field1` Nullable(tupleElement(x, 2), UInt8) diff --git a/tests/queries/0_stateless/03210_inconsistent_formatting_of_data_types.sh b/tests/queries/0_stateless/03210_inconsistent_formatting_of_data_types.sh new file mode 100755 index 00000000000..6cb2d083d71 --- /dev/null +++ b/tests/queries/0_stateless/03210_inconsistent_formatting_of_data_types.sh @@ -0,0 +1,7 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_FORMAT --oneline --query "ALTER TABLE columns_with_multiple_streams MODIFY COLUMN field1 Nullable(tupleElement(x, 2), UInt8)" | $CLICKHOUSE_FORMAT --oneline From 37c345bb4925095da3e82e3fc3ed27072786d7e7 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Wed, 24 Jul 2024 16:01:21 +0200 Subject: [PATCH 159/661] rewrite 01171 test --- ..._mv_select_insert_isolation_long.reference | 4 - .../01171_mv_select_insert_isolation_long.sh | 229 ++++++++++++------ 2 files changed, 152 insertions(+), 81 deletions(-) diff --git a/tests/queries/0_stateless/01171_mv_select_insert_isolation_long.reference b/tests/queries/0_stateless/01171_mv_select_insert_isolation_long.reference index d8bb9e310e6..e69de29bb2d 100644 --- a/tests/queries/0_stateless/01171_mv_select_insert_isolation_long.reference +++ b/tests/queries/0_stateless/01171_mv_select_insert_isolation_long.reference @@ -1,4 +0,0 @@ -275 0 138 136 0 -275 0 -275 0 138 136 0 -275 0 diff --git a/tests/queries/0_stateless/01171_mv_select_insert_isolation_long.sh b/tests/queries/0_stateless/01171_mv_select_insert_isolation_long.sh index 2ab7f883367..f6850864be5 100755 --- a/tests/queries/0_stateless/01171_mv_select_insert_isolation_long.sh +++ b/tests/queries/0_stateless/01171_mv_select_insert_isolation_long.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long, no-ordinary-database, no-debug +# Tags: long, no-ordinary-database # Test is too heavy, avoid parallel run in Flaky Check # shellcheck disable=SC2119 @@ -7,82 +7,125 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -set -e +set -ue $CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS src"; $CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS dst"; $CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS mv"; $CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS tmp"; -$CLICKHOUSE_CLIENT --query "CREATE TABLE src (n Int8, m Int8, CONSTRAINT c CHECK xxHash32(n+m) % 8 != 0) ENGINE=MergeTree ORDER BY n PARTITION BY 0 < n SETTINGS old_parts_lifetime=0"; -$CLICKHOUSE_CLIENT --query "CREATE TABLE dst (nm Int16, CONSTRAINT c CHECK xxHash32(nm) % 8 != 0) ENGINE=MergeTree ORDER BY nm SETTINGS old_parts_lifetime=0"; -$CLICKHOUSE_CLIENT --query "CREATE MATERIALIZED VIEW mv TO dst (nm Int16) AS SELECT n*m AS nm FROM src"; -$CLICKHOUSE_CLIENT --query "CREATE TABLE tmp (x UInt8, nm Int16) ENGINE=MergeTree ORDER BY (x, nm) SETTINGS old_parts_lifetime=0" +$CLICKHOUSE_CLIENT --query "CREATE TABLE src (n Int32, m Int32, CONSTRAINT c CHECK xxHash32(n+m) % 8 != 0) ENGINE=MergeTree ORDER BY n PARTITION BY 0 < n SETTINGS old_parts_lifetime=0"; +$CLICKHOUSE_CLIENT --query "CREATE TABLE dst (nm Int32, CONSTRAINT c CHECK xxHash32(nm) % 8 != 0) ENGINE=MergeTree ORDER BY nm SETTINGS old_parts_lifetime=0"; +$CLICKHOUSE_CLIENT --query "CREATE MATERIALIZED VIEW mv TO dst (nm Int32) AS SELECT n*m AS nm FROM src"; + +$CLICKHOUSE_CLIENT --query "CREATE TABLE tmp (x UInt32, nm Int32) ENGINE=MergeTree ORDER BY (x, nm) SETTINGS old_parts_lifetime=0" $CLICKHOUSE_CLIENT --query "INSERT INTO src VALUES (0, 0)" -# some transactions will fail due to constraint -function thread_insert_commit() +function get_now() { - set -e - for i in {1..100}; do - $CLICKHOUSE_CLIENT --multiquery --query " - BEGIN TRANSACTION; - INSERT INTO src VALUES /* ($i, $1) */ ($i, $1); - SELECT throwIf((SELECT sum(nm) FROM mv) != $(($i * $1))) FORMAT Null; - INSERT INTO src VALUES /* (-$i, $1) */ (-$i, $1); - COMMIT;" 2>&1| grep -Fv "is violated at row" | grep -Fv "Transaction is not in RUNNING state" | grep -F "Received from " ||: - done + date +%s } -function thread_insert_rollback() +is_pid_exist() +{ + local pid=$1 + ps -p $pid > /dev/null +} + +function run_until_deadline_and_at_least_times() { set -e - for _ in {1..100}; do - $CLICKHOUSE_CLIENT --multiquery --query " - BEGIN TRANSACTION; - INSERT INTO src VALUES /* (42, $1) */ (42, $1); - SELECT throwIf((SELECT count() FROM src WHERE n=42 AND m=$1) != 1) FORMAT Null; - ROLLBACK;" + + local deadline=$1; shift + local min_iterations=$1; shift + local function_to_run=$1; shift + + local started_time=$(get_now) + local i=0 + + while true + do + $function_to_run $i $@ + + [[ $(get_now) -lt $deadline ]] || break + + i=$(($i + 1)) done + + [[ $i -gt $min_iterations ]] || echo "$i/$min_iterations : not enough iterations of $function_to_run has been made from $started_time until $deadline" >&2 +} + +function insert_commit_action() +{ + set -e + + local i=$1; shift + local tag=$1; shift + + # some transactions will fail due to constraint + $CLICKHOUSE_CLIENT --multiquery --query " + BEGIN TRANSACTION; + INSERT INTO src VALUES /* ($i, $tag) */ ($i, $tag); + SELECT throwIf((SELECT sum(nm) FROM mv) != $(($i * $tag))) /* ($i, $tag) */ FORMAT Null; + INSERT INTO src VALUES /* (-$i, $tag) */ (-$i, $tag); + COMMIT; + " 2>&1 \ + | grep -Fv "is violated at row" | grep -Fv "Transaction is not in RUNNING state" | grep -F "Received from " ||: +} + + +function insert_rollback_action() +{ + set -e + + local i=$1; shift + local tag=$1; shift + + $CLICKHOUSE_CLIENT --multiquery --query " + BEGIN TRANSACTION; + INSERT INTO src VALUES /* (42, $tag) */ (42, $tag); + SELECT throwIf((SELECT count() FROM src WHERE n=42 AND m=$tag) != 1) FORMAT Null; + ROLLBACK;" } # make merges more aggressive -function thread_optimize() +function optimize_action() { set -e - while true; do - optimize_query="OPTIMIZE TABLE src" - partition_id=$(( RANDOM % 2 )) - if (( RANDOM % 2 )); then - optimize_query="OPTIMIZE TABLE dst" - partition_id="all" - fi - if (( RANDOM % 2 )); then - optimize_query="$optimize_query PARTITION ID '$partition_id'" - fi - if (( RANDOM % 2 )); then - optimize_query="$optimize_query FINAL" - fi - action="COMMIT" - if (( RANDOM % 4 )); then - action="ROLLBACK" - fi - $CLICKHOUSE_CLIENT --multiquery --query " + optimize_query="OPTIMIZE TABLE src" + partition_id=$(( RANDOM % 2 )) + if (( RANDOM % 2 )); then + optimize_query="OPTIMIZE TABLE dst" + partition_id="all" + fi + if (( RANDOM % 2 )); then + optimize_query="$optimize_query PARTITION ID '$partition_id'" + fi + if (( RANDOM % 2 )); then + optimize_query="$optimize_query FINAL" + fi + action="COMMIT" + if (( RANDOM % 4 )); then + action="ROLLBACK" + fi + + $CLICKHOUSE_CLIENT --multiquery --query " BEGIN TRANSACTION; - $optimize_query; + $optimize_query; $action; - " 2>&1| grep -Fv "already exists, but it will be deleted soon" | grep -F "Received from " ||: - sleep 0.$RANDOM; - done + " 2>&1 \ + | grep -Fv "already exists, but it will be deleted soon" | grep -F "Received from " ||: + + sleep 0.$RANDOM; } -function thread_select() +function select_action() { set -e - while true; do - $CLICKHOUSE_CLIENT --multiquery --query " + + $CLICKHOUSE_CLIENT --multiquery --query " BEGIN TRANSACTION; SELECT throwIf((SELECT (sum(n), count() % 2) FROM src) != (0, 1)) FORMAT Null; SELECT throwIf((SELECT (sum(nm), count() % 2) FROM mv) != (0, 1)) FORMAT Null; @@ -90,14 +133,13 @@ function thread_select() SELECT throwIf((SELECT arraySort(groupArray(nm)) FROM mv) != (SELECT arraySort(groupArray(nm)) FROM dst)) FORMAT Null; SELECT throwIf((SELECT arraySort(groupArray(nm)) FROM mv) != (SELECT arraySort(groupArray(n*m)) FROM src)) FORMAT Null; COMMIT;" - done } -function thread_select_insert() +function select_insert_action() { set -e - while true; do - $CLICKHOUSE_CLIENT --multiquery --query " + + $CLICKHOUSE_CLIENT --multiquery --query " BEGIN TRANSACTION; SELECT throwIf((SELECT count() FROM tmp) != 0) FORMAT Null; INSERT INTO tmp SELECT 1, n*m FROM src; @@ -110,36 +152,69 @@ function thread_select_insert() SELECT throwIf(1 != (SELECT countDistinct(arr) FROM (SELECT x, arraySort(groupArray(nm)) AS arr FROM tmp WHERE x!=4 GROUP BY x))) FORMAT Null; SELECT throwIf((SELECT count(), sum(nm) FROM tmp WHERE x=4) != (SELECT count(), sum(nm) FROM tmp WHERE x!=4)) FORMAT Null; ROLLBACK;" - done } -thread_insert_commit 1 & PID_1=$! -thread_insert_commit 2 & PID_2=$! -thread_insert_rollback 3 & PID_3=$! +MAIN_TIME_PART=400 +SECOND_TIME_PART=30 +WAIT_FINISH=60 +LAST_TIME_GAP=10 -thread_optimize & PID_4=$! -thread_select & PID_5=$! -thread_select_insert & PID_6=$! -sleep 0.$RANDOM; -thread_select & PID_7=$! -thread_select_insert & PID_8=$! +if [[ $((MAIN_TIME_PART + SECOND_TIME_PART + WAIT_FINISH + LAST_TIME_GAP)) -ge 600 ]]; then + echo "time sttings are wrong" 2>&1 + exit 1 +fi -wait $PID_1 && wait $PID_2 && wait $PID_3 -kill -TERM $PID_4 -kill -TERM $PID_5 -kill -TERM $PID_6 -kill -TERM $PID_7 -kill -TERM $PID_8 -wait -wait_for_queries_to_finish 40 +START_TIME=$(get_now) +STOP_TIME=$((START_TIME + MAIN_TIME_PART)) +SECOND_STOP_TIME=$((STOP_TIME + SECOND_TIME_PART)) +MIN_ITERATIONS=50 + +run_until_deadline_and_at_least_times $STOP_TIME $MIN_ITERATIONS insert_commit_action 1 & PID_1=$! +run_until_deadline_and_at_least_times $STOP_TIME $MIN_ITERATIONS insert_commit_action 2 & PID_2=$! +run_until_deadline_and_at_least_times $STOP_TIME $MIN_ITERATIONS insert_rollback_action 3 & PID_3=$! + +run_until_deadline_and_at_least_times $SECOND_STOP_TIME $MIN_ITERATIONS optimize_action & PID_4=$! +run_until_deadline_and_at_least_times $SECOND_STOP_TIME $MIN_ITERATIONS select_action & PID_5=$! +run_until_deadline_and_at_least_times $SECOND_STOP_TIME $MIN_ITERATIONS select_insert_action & PID_6=$! +sleep 0.$RANDOM +run_until_deadline_and_at_least_times $SECOND_STOP_TIME $MIN_ITERATIONS select_action & PID_7=$! +run_until_deadline_and_at_least_times $SECOND_STOP_TIME $MIN_ITERATIONS select_insert_action & PID_8=$! + +wait $PID_1 || echo "insert_commit_action has failed with status $?" 2>&1 +wait $PID_2 || echo "second insert_commit_action has failed with status $?" 2>&1 +wait $PID_3 || echo "insert_rollback_action has failed with status $?" 2>&1 + +is_pid_exist $PID_4 || echo "optimize_action is not running" 2>&1 +is_pid_exist $PID_5 || echo "select_action is not running" 2>&1 +is_pid_exist $PID_6 || echo "select_insert_action is not running" 2>&1 +is_pid_exist $PID_7 || echo "second select_action is not running" 2>&1 +is_pid_exist $PID_8 || echo "second select_insert_action is not running" 2>&1 + +wait $PID_4 || echo "optimize_action has failed with status $?" 2>&1 +wait $PID_5 || echo "select_action has failed with status $?" 2>&1 +wait $PID_6 || echo "select_insert_action has failed with status $?" 2>&1 +wait $PID_7 || echo "second select_action has failed with status $?" 2>&1 +wait $PID_8 || echo "second select_insert_action has failed with status $?" 2>&1 + +wait_for_queries_to_finish $WAIT_FINISH $CLICKHOUSE_CLIENT --multiquery --query " -BEGIN TRANSACTION; -SELECT count(), sum(n), sum(m=1), sum(m=2), sum(m=3) FROM src; -SELECT count(), sum(nm) FROM mv"; + BEGIN TRANSACTION; + SELECT throwIf((SELECT (sum(n), count() % 2) FROM src) != (0, 1)) FORMAT Null; + SELECT throwIf((SELECT (sum(nm), count() % 2) FROM mv) != (0, 1)) FORMAT Null; + SELECT throwIf((SELECT (sum(nm), count() % 2) FROM dst) != (0, 1)) FORMAT Null; + SELECT throwIf((SELECT arraySort(groupArray(nm)) FROM mv) != (SELECT arraySort(groupArray(nm)) FROM dst)) FORMAT Null; + SELECT throwIf((SELECT arraySort(groupArray(nm)) FROM mv) != (SELECT arraySort(groupArray(n*m)) FROM src)) FORMAT Null; + COMMIT; +" -$CLICKHOUSE_CLIENT --query "SELECT count(), sum(n), sum(m=1), sum(m=2), sum(m=3) FROM src" -$CLICKHOUSE_CLIENT --query "SELECT count(), sum(nm) FROM mv" +$CLICKHOUSE_CLIENT --multiquery --query " + SELECT throwIf((SELECT (sum(n), count() % 2) FROM src) != (0, 1)) FORMAT Null; + SELECT throwIf((SELECT (sum(nm), count() % 2) FROM mv) != (0, 1)) FORMAT Null; + SELECT throwIf((SELECT (sum(nm), count() % 2) FROM dst) != (0, 1)) FORMAT Null; + SELECT throwIf((SELECT arraySort(groupArray(nm)) FROM mv) != (SELECT arraySort(groupArray(nm)) FROM dst)) FORMAT Null; + SELECT throwIf((SELECT arraySort(groupArray(nm)) FROM mv) != (SELECT arraySort(groupArray(n*m)) FROM src)) FORMAT Null; +" $CLICKHOUSE_CLIENT --query "DROP TABLE src"; $CLICKHOUSE_CLIENT --query "DROP TABLE dst"; From f03d4bb7d5d40203bba68c4f8958d584f27ae881 Mon Sep 17 00:00:00 2001 From: Pablo Marcos Date: Wed, 24 Jul 2024 15:07:53 +0000 Subject: [PATCH 160/661] Format with black --- .../integration/test_storage_s3_queue/test.py | 24 ++++++++++++------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/tests/integration/test_storage_s3_queue/test.py b/tests/integration/test_storage_s3_queue/test.py index e178b3b6608..4348857acd3 100644 --- a/tests/integration/test_storage_s3_queue/test.py +++ b/tests/integration/test_storage_s3_queue/test.py @@ -823,7 +823,7 @@ def test_multiple_tables_streaming_sync_distributed(started_cluster, mode): def test_max_set_age(started_cluster): node = started_cluster.instances["instance"] - table_name = f"max_set_age_{uuid4().hex}" + table_name = "max_set_age" dst_table_name = f"{table_name}_dst" keeper_path = f"/clickhouse/test_{table_name}" files_path = f"{table_name}_data" @@ -848,9 +848,7 @@ def test_max_set_age(started_cluster): ) create_mv(node, table_name, dst_table_name) - _ = generate_random_files( - started_cluster, files_path, files_to_generate, row_num=1 - ) + _ = generate_random_files(started_cluster, files_path, files_to_generate, row_num=1) expected_rows = files_to_generate @@ -869,13 +867,17 @@ def test_max_set_age(started_cluster): assert False wait_for_condition(lambda: get_count() == expected_rows) - assert files_to_generate == int(node.query(f"SELECT uniq(_path) from {dst_table_name}")) + assert files_to_generate == int( + node.query(f"SELECT uniq(_path) from {dst_table_name}") + ) time.sleep(max_age + 5) expected_rows *= 2 wait_for_condition(lambda: get_count() == expected_rows) - assert files_to_generate == int(node.query(f"SELECT uniq(_path) from {dst_table_name}")) + assert files_to_generate == int( + node.query(f"SELECT uniq(_path) from {dst_table_name}") + ) paths_count = [ int(x) @@ -888,9 +890,11 @@ def test_max_set_age(started_cluster): assert 2 == path_count def get_object_storage_failures(): - return int(node.query( - "SELECT value FROM system.events WHERE name = 'ObjectStorageQueueFailedFiles' SETTINGS system_events_show_zero_values=1" - )) + return int( + node.query( + "SELECT value FROM system.events WHERE name = 'ObjectStorageQueueFailedFiles' SETTINGS system_events_show_zero_values=1" + ) + ) failed_count = get_object_storage_failures() @@ -900,6 +904,8 @@ def test_max_set_age(started_cluster): values_csv = ( "\n".join((",".join(map(str, row)) for row in values)) + "\n" ).encode() + + # use a different filename for each test to allow running a bunch of them sequentially with --count file_with_error = f"fff_{uuid4().hex}.csv" put_s3_file_content(started_cluster, f"{files_path}/{file_with_error}", values_csv) From c3620391b0befaf30eea0eab9001cc98fd5eeecc Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Wed, 24 Jul 2024 17:48:07 +0200 Subject: [PATCH 161/661] fix style --- .../0_stateless/01171_mv_select_insert_isolation_long.sh | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/01171_mv_select_insert_isolation_long.sh b/tests/queries/0_stateless/01171_mv_select_insert_isolation_long.sh index f6850864be5..718017bca3d 100755 --- a/tests/queries/0_stateless/01171_mv_select_insert_isolation_long.sh +++ b/tests/queries/0_stateless/01171_mv_select_insert_isolation_long.sh @@ -41,12 +41,13 @@ function run_until_deadline_and_at_least_times() local min_iterations=$1; shift local function_to_run=$1; shift - local started_time=$(get_now) + local started_time + started_time=$(get_now) local i=0 while true do - $function_to_run $i $@ + $function_to_run $i "$@" [[ $(get_now) -lt $deadline ]] || break From 689e31b47e1c85f1ae9721b3928de658eaf9a6ff Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 24 Jul 2024 18:31:40 +0200 Subject: [PATCH 162/661] More tests --- ...03210_inconsistent_formatting_of_data_types.reference | 6 ++++++ .../03210_inconsistent_formatting_of_data_types.sh | 9 +++++++++ 2 files changed, 15 insertions(+) diff --git a/tests/queries/0_stateless/03210_inconsistent_formatting_of_data_types.reference b/tests/queries/0_stateless/03210_inconsistent_formatting_of_data_types.reference index ccb445a0573..836b526905a 100644 --- a/tests/queries/0_stateless/03210_inconsistent_formatting_of_data_types.reference +++ b/tests/queries/0_stateless/03210_inconsistent_formatting_of_data_types.reference @@ -1 +1,7 @@ ALTER TABLE columns_with_multiple_streams MODIFY COLUMN `field1` Nullable(tupleElement(x, 2), UInt8) +ALTER TABLE t_update_empty_nested ADD COLUMN `nested.arr2` Array(tuple('- ON NULL -', toLowCardinality(11), 11, 11, toLowCardinality(11), 11), UInt64) +ALTER TABLE t ADD COLUMN `x` Array(tuple(1), UInt8) +ALTER TABLE enum_alter_issue MODIFY COLUMN `a` Enum8(equals('one', timeSlots(timeSlots(arrayEnumerateDense(tuple('0.2147483646', toLowCardinality(toUInt128)), NULL), 4, 12.34, materialize(73), 2)), 1)) +ALTER TABLE t_sparse_mutations_3 MODIFY COLUMN `s` Tuple(Nullable(tupleElement(s, 1), UInt64), Nullable(UInt64), Nullable(UInt64), Nullable(UInt64), Nullable(String)) +Syntax error +Syntax error diff --git a/tests/queries/0_stateless/03210_inconsistent_formatting_of_data_types.sh b/tests/queries/0_stateless/03210_inconsistent_formatting_of_data_types.sh index 6cb2d083d71..86c7a5469ca 100755 --- a/tests/queries/0_stateless/03210_inconsistent_formatting_of_data_types.sh +++ b/tests/queries/0_stateless/03210_inconsistent_formatting_of_data_types.sh @@ -4,4 +4,13 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh +# Ensure that these (possibly incorrect) queries can at least be parsed back after formatting. $CLICKHOUSE_FORMAT --oneline --query "ALTER TABLE columns_with_multiple_streams MODIFY COLUMN field1 Nullable(tupleElement(x, 2), UInt8)" | $CLICKHOUSE_FORMAT --oneline +$CLICKHOUSE_FORMAT --oneline --query "ALTER TABLE t_update_empty_nested ADD COLUMN \`nested.arr2\` Array(tuple('- ON NULL -', toLowCardinality(11), 11, 11, toLowCardinality(11), 11), UInt64)" | $CLICKHOUSE_FORMAT --oneline +$CLICKHOUSE_FORMAT --oneline --query "ALTER TABLE t ADD COLUMN x Array((1), UInt8)" | $CLICKHOUSE_FORMAT --oneline +$CLICKHOUSE_FORMAT --oneline --query "ALTER TABLE enum_alter_issue (MODIFY COLUMN a Enum8(equals('one', timeSlots(timeSlots(arrayEnumerateDense(tuple('0.2147483646', toLowCardinality(toUInt128(12))), NULL), 4, 12.34, materialize(73), 2)), 1)))" | $CLICKHOUSE_FORMAT --oneline +$CLICKHOUSE_FORMAT --oneline --query "ALTER TABLE t_sparse_mutations_3 MODIFY COLUMN s Tuple(Nullable(tupleElement(s, 1), UInt64), Nullable(UInt64), Nullable(UInt64), Nullable(UInt64), Nullable(String))" | $CLICKHOUSE_FORMAT --oneline + +# These invalid queries don't parse and this is normal. +$CLICKHOUSE_FORMAT --oneline --query "ALTER TABLE alter_compression_codec1 MODIFY COLUMN alter_column CODEC((2 + ignore(1, toUInt128(materialize(2)), 2 + toNullable(toNullable(3))), 3), NONE)" 2>&1 | grep -o -F 'Syntax error' +$CLICKHOUSE_FORMAT --oneline --query "ALTER TABLE test_table ADD COLUMN \`array\` Array(('110', 3, toLowCardinality(3), 3, toNullable(3), toLowCardinality(toNullable(3)), 3), UInt8) DEFAULT [1, 2, 3]" 2>&1 | grep -o -F 'Syntax error' From 5d88f6fc8c4b8fcce4e7a5da073f5d11a86cd3cb Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Wed, 24 Jul 2024 19:32:20 +0200 Subject: [PATCH 163/661] fix MIN_ITERATIONS --- .../0_stateless/01171_mv_select_insert_isolation_long.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01171_mv_select_insert_isolation_long.sh b/tests/queries/0_stateless/01171_mv_select_insert_isolation_long.sh index 718017bca3d..d79ab27d8b2 100755 --- a/tests/queries/0_stateless/01171_mv_select_insert_isolation_long.sh +++ b/tests/queries/0_stateless/01171_mv_select_insert_isolation_long.sh @@ -168,7 +168,7 @@ fi START_TIME=$(get_now) STOP_TIME=$((START_TIME + MAIN_TIME_PART)) SECOND_STOP_TIME=$((STOP_TIME + SECOND_TIME_PART)) -MIN_ITERATIONS=50 +MIN_ITERATIONS=30 run_until_deadline_and_at_least_times $STOP_TIME $MIN_ITERATIONS insert_commit_action 1 & PID_1=$! run_until_deadline_and_at_least_times $STOP_TIME $MIN_ITERATIONS insert_commit_action 2 & PID_2=$! From 687c99e39a3ece073239517ffbcecf4612721995 Mon Sep 17 00:00:00 2001 From: jsc0218 Date: Wed, 24 Jul 2024 18:37:25 +0000 Subject: [PATCH 164/661] try to fix --- .../0_stateless/02680_mysql_ast_logical_err.sql | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/02680_mysql_ast_logical_err.sql b/tests/queries/0_stateless/02680_mysql_ast_logical_err.sql index bde91df83ca..78ce1b68b0d 100644 --- a/tests/queries/0_stateless/02680_mysql_ast_logical_err.sql +++ b/tests/queries/0_stateless/02680_mysql_ast_logical_err.sql @@ -1,4 +1,10 @@ CREATE TABLE foo (key UInt32, a String, b Int64, c String) ENGINE = TinyLog; -SELECT count() FROM mysql(mysql('127.0.0.1:9004', currentDatabase(), 'foo', 'default', ''), '127.0.0.1:9004', currentDatabase(), 'foo', '', ''); -- { serverError UNKNOWN_FUNCTION } -SELECT count() FROM mysql(mysql('127.0.0.1:9004', currentDatabase(), 'foo', 'default', '', SETTINGS connection_pool_size = 1), '127.0.0.1:9004', currentDatabase(), 'foo', '', ''); -- { serverError UNKNOWN_FUNCTION, UNSUPPORTED_METHOD } +SELECT count() FROM mysql( + mysql('127.0.0.1:9004', currentDatabase(), 'foo', 'default', ''), + '127.0.0.1:9004', currentDatabase(), 'foo', '', '', + SETTINGS connect_timeout = 100, connection_wait_timeout = 100, read_write_timeout = 300); -- { serverError UNKNOWN_FUNCTION } +SELECT count() FROM mysql( + mysql('127.0.0.1:9004', currentDatabase(), 'foo', 'default', '', SETTINGS connection_pool_size = 1), + '127.0.0.1:9004', currentDatabase(), 'foo', '', '', + SETTINGS connect_timeout = 100, connection_wait_timeout = 100, read_write_timeout = 300); -- { serverError UNKNOWN_FUNCTION, UNSUPPORTED_METHOD } From e4b50c18c2c1918905bf44a8e1183f0cddd5a811 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Wed, 24 Jul 2024 22:26:46 +0200 Subject: [PATCH 165/661] getauxval: Avoid crash under sanitizer re-exec due to high ASLR entropy --- base/glibc-compatibility/musl/getauxval.c | 38 +++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/base/glibc-compatibility/musl/getauxval.c b/base/glibc-compatibility/musl/getauxval.c index ea5cff9fc11..86f9a546ee4 100644 --- a/base/glibc-compatibility/musl/getauxval.c +++ b/base/glibc-compatibility/musl/getauxval.c @@ -75,6 +75,44 @@ unsigned long NO_SANITIZE_THREAD __getauxval_procfs(unsigned long type) } static unsigned long NO_SANITIZE_THREAD __auxv_init_procfs(unsigned long type) { +#if defined(__x86_64__) && defined(__has_feature) +# if __has_feature(memory_sanitizer) || __has_feature(thread_sanitizer) + /// Sanitizers are not compatible with high ASLR entropy, which is the default on modern Linux distributions, and + /// to workaround this limitation, TSAN and MSAN (couldn't see other sanitizers doing the same), re-exec the binary + /// without ASLR (see https://github.com/llvm/llvm-project/commit/0784b1eefa36d4acbb0dacd2d18796e26313b6c5) + + /// The problem we face is that, in order to re-exec, the sanitizer wants to use the original pathname in the call + /// and to get its value it uses getauxval (https://github.com/llvm/llvm-project/blob/20eff684203287828d6722fc860b9d3621429542/compiler-rt/lib/sanitizer_common/sanitizer_linux_libcdep.cpp#L985-L988). + /// Since we provide getauxval ourselves (to minimize the version dependency on runtime glibc), we are the ones + // being called and we fail horribly: + /// + /// ==301455==ERROR: MemorySanitizer: SEGV on unknown address 0x2ffc6d721550 (pc 0x5622c1cc0073 bp 0x000000000003 sp 0x7ffc6d721530 T301455) + /// ==301455==The signal is caused by a WRITE memory access. + /// #0 0x5622c1cc0073 in __auxv_init_procfs ./ClickHouse/base/glibc-compatibility/musl/getauxval.c:129:5 + /// #1 0x5622c1cbffe9 in getauxval ./ClickHouse/base/glibc-compatibility/musl/getauxval.c:240:12 + /// #2 0x5622c0d7bfb4 in __sanitizer::ReExec() crtstuff.c + /// #3 0x5622c0df7bfc in __msan::InitShadowWithReExec(bool) crtstuff.c + /// #4 0x5622c0d95356 in __msan_init (./ClickHouse/build_msan/contrib/google-protobuf-cmake/protoc+0x256356) (BuildId: 6411d3c88b898ba3f7d49760555977d3e61f0741) + /// #5 0x5622c0dfe878 in msan.module_ctor main.cc + /// #6 0x5622c1cc156c in __libc_csu_init (./ClickHouse/build_msan/contrib/google-protobuf-cmake/protoc+0x118256c) (BuildId: 6411d3c88b898ba3f7d49760555977d3e61f0741) + /// #7 0x73dc05dd7ea3 in __libc_start_main /usr/src/debug/glibc/glibc/csu/../csu/libc-start.c:343:6 + /// #8 0x5622c0d6b7cd in _start (./ClickHouse/build_msan/contrib/google-protobuf-cmake/protoc+0x22c7cd) (BuildId: 6411d3c88b898ba3f7d49760555977d3e61f0741) + + /// The source of the issue above is that, at this point in time during __msan_init, we can't really do much as + /// most global variables aren't initialized or available yet, so we we can't initiate the auxiliar vector. + /// Normal glibc / musl getauxval doesn't have this problem since they initiate their auxval vector at the very + /// start of __libc_start_main (just keeping track of argv+argc+1), but we don't have such option (otherwise + // this complexity of reading "/proc/self/auxv" or using __environ would not be necessary). + + /// To avoid this crashes on the re-exec call (see above how it would fail when creating `aux`, and it we used + /// __auxv_init_environ then it would SIGSEV on READing `__environ`) we capture this call for `AT_EXECFN` and + /// unconditionally return "/proc/self/exe" without any preparation. Theoretically this should be fine in + /// our case, as we don't load any libraries. That's the theory at least. + if (type == AT_EXECFN) + return (unsigned long)"/proc/self/exe"; +# endif +#endif + // For debugging: // - od -t dL /proc/self/auxv // - LD_SHOW_AUX= ls From fda11dc62d81b717b9ab06c8adc8554c827764bf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Wed, 24 Jul 2024 22:51:26 +0200 Subject: [PATCH 166/661] Typo --- base/glibc-compatibility/musl/getauxval.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/base/glibc-compatibility/musl/getauxval.c b/base/glibc-compatibility/musl/getauxval.c index 86f9a546ee4..b5bd2f114c2 100644 --- a/base/glibc-compatibility/musl/getauxval.c +++ b/base/glibc-compatibility/musl/getauxval.c @@ -99,7 +99,7 @@ static unsigned long NO_SANITIZE_THREAD __auxv_init_procfs(unsigned long type) /// #8 0x5622c0d6b7cd in _start (./ClickHouse/build_msan/contrib/google-protobuf-cmake/protoc+0x22c7cd) (BuildId: 6411d3c88b898ba3f7d49760555977d3e61f0741) /// The source of the issue above is that, at this point in time during __msan_init, we can't really do much as - /// most global variables aren't initialized or available yet, so we we can't initiate the auxiliar vector. + /// most global variables aren't initialized or available yet, so we we can't initiate the auxiliary vector. /// Normal glibc / musl getauxval doesn't have this problem since they initiate their auxval vector at the very /// start of __libc_start_main (just keeping track of argv+argc+1), but we don't have such option (otherwise // this complexity of reading "/proc/self/auxv" or using __environ would not be necessary). From a6a9b8c27204f96e373c9625145dc1609cb7ca8f Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky <43110995+evillique@users.noreply.github.com> Date: Thu, 25 Jul 2024 00:49:28 +0200 Subject: [PATCH 167/661] Fix flaky 02447_drop_replica test --- tests/queries/0_stateless/02447_drop_database_replica.sh | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/queries/0_stateless/02447_drop_database_replica.sh b/tests/queries/0_stateless/02447_drop_database_replica.sh index 93a5fcee8e2..c6bf298f944 100755 --- a/tests/queries/0_stateless/02447_drop_database_replica.sh +++ b/tests/queries/0_stateless/02447_drop_database_replica.sh @@ -1,5 +1,9 @@ #!/usr/bin/env bash +# Tags: no-parallel +# no-parallel: This test is not parallel because when we execute system-wide SYSTEM DROP REPLICA, +# other tests might shut down the storage in parallel and the test will fail. + CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh From 8df648b3c8bbc22cee9657145b825e9d991e3c8e Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Thu, 25 Jul 2024 00:56:41 +0200 Subject: [PATCH 168/661] fix a test, add retries for sql tests --- src/Client/ClientBase.cpp | 12 ++++- src/Client/TestHint.cpp | 52 ++++++++++++++++++- src/Client/TestHint.h | 6 +++ .../02446_parent_zero_copy_locks.sql | 14 +++-- 4 files changed, 76 insertions(+), 8 deletions(-) diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 13dce05cabc..149e1899ac3 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -2230,6 +2230,8 @@ bool ClientBase::executeMultiQuery(const String & all_queries_text) ASTPtr parsed_query; std::unique_ptr current_exception; + size_t retries_count = 0; + while (true) { auto stage = analyzeMultiQueryText(this_query_begin, this_query_end, all_queries_end, @@ -2310,7 +2312,12 @@ bool ClientBase::executeMultiQuery(const String & all_queries_text) // Check whether the error (or its absence) matches the test hints // (or their absence). bool error_matches_hint = true; - if (have_error) + bool need_retry = test_hint.needRetry(server_exception, &retries_count); + if (need_retry) + { + std::this_thread::sleep_for(std::chrono::seconds(1)); + } + else if (have_error) { if (test_hint.hasServerErrors()) { @@ -2404,7 +2411,8 @@ bool ClientBase::executeMultiQuery(const String & all_queries_text) if (have_error && !ignore_error) return is_interactive; - this_query_begin = this_query_end; + if (!need_retry) + this_query_begin = this_query_end; break; } } diff --git a/src/Client/TestHint.cpp b/src/Client/TestHint.cpp index b64882577ee..74c65009a73 100644 --- a/src/Client/TestHint.cpp +++ b/src/Client/TestHint.cpp @@ -10,6 +10,7 @@ namespace DB::ErrorCodes { extern const int CANNOT_PARSE_TEXT; + extern const int OK; } namespace DB @@ -62,9 +63,28 @@ bool TestHint::hasExpectedServerError(int error) return std::find(server_errors.begin(), server_errors.end(), error) != server_errors.end(); } +bool TestHint::needRetry(const std::unique_ptr & server_exception, size_t * retries_counter) +{ + chassert(retries_counter); + if (max_retries <= *retries_counter) + return false; + + ++*retries_counter; + + int error = ErrorCodes::OK; + if (server_exception) + error = server_exception->code(); + + + if (retry_until) + return !hasExpectedServerError(error); /// retry until we get the expected error + else + return hasExpectedServerError(error); /// retry while we have the expected error +} + void TestHint::parse(Lexer & comment_lexer, bool is_leading_hint) { - std::unordered_set commands{"echo", "echoOn", "echoOff"}; + std::unordered_set commands{"echo", "echoOn", "echoOff", "retry"}; std::unordered_set command_errors{ "serverError", @@ -73,6 +93,9 @@ void TestHint::parse(Lexer & comment_lexer, bool is_leading_hint) for (Token token = comment_lexer.nextToken(); !token.isEnd(); token = comment_lexer.nextToken()) { + if (token.type == TokenType::Whitespace) + continue; + String item = String(token.begin, token.end); if (token.type == TokenType::BareWord && commands.contains(item)) { @@ -82,6 +105,30 @@ void TestHint::parse(Lexer & comment_lexer, bool is_leading_hint) echo.emplace(true); if (item == "echoOff") echo.emplace(false); + + if (item == "retry") + { + token = comment_lexer.nextToken(); + while (token.type == TokenType::Whitespace) + token = comment_lexer.nextToken(); + + if (token.type != TokenType::Number) + throw DB::Exception(DB::ErrorCodes::CANNOT_PARSE_TEXT, "Could not parse the number of retries: {}", + std::string_view(token.begin, token.end)); + + max_retries = std::stoul(std::string(token.begin, token.end)); + + token = comment_lexer.nextToken(); + while (token.type == TokenType::Whitespace) + token = comment_lexer.nextToken(); + + if (token.type != TokenType::BareWord || + (std::string_view(token.begin, token.end) != "until" && + std::string_view(token.begin, token.end) != "while")) + throw DB::Exception(DB::ErrorCodes::CANNOT_PARSE_TEXT, "Expected 'until' or 'while' after the number of retries, got: {}", + std::string_view(token.begin, token.end)); + retry_until = std::string_view(token.begin, token.end) == "until"; + } } else if (!is_leading_hint && token.type == TokenType::BareWord && command_errors.contains(item)) { @@ -133,6 +180,9 @@ void TestHint::parse(Lexer & comment_lexer, bool is_leading_hint) break; } } + + if (max_retries && server_errors.size() != 1) + throw DB::Exception(DB::ErrorCodes::CANNOT_PARSE_TEXT, "Expected one serverError after the 'retry N while|until' command"); } } diff --git a/src/Client/TestHint.h b/src/Client/TestHint.h index b76c4245df4..bbe7873c08b 100644 --- a/src/Client/TestHint.h +++ b/src/Client/TestHint.h @@ -6,6 +6,7 @@ #include #include +#include namespace DB @@ -65,12 +66,17 @@ public: bool hasExpectedClientError(int error); bool hasExpectedServerError(int error); + bool needRetry(const std::unique_ptr & server_exception, size_t * retries_counter); + private: const String & query; ErrorVector server_errors{}; ErrorVector client_errors{}; std::optional echo; + size_t max_retries = 0; + bool retry_until = false; + void parse(Lexer & comment_lexer, bool is_leading_hint); bool allErrorsExpected(int actual_server_error, int actual_client_error) const diff --git a/tests/queries/0_stateless/02446_parent_zero_copy_locks.sql b/tests/queries/0_stateless/02446_parent_zero_copy_locks.sql index 86eda526c72..1cae8ae0237 100644 --- a/tests/queries/0_stateless/02446_parent_zero_copy_locks.sql +++ b/tests/queries/0_stateless/02446_parent_zero_copy_locks.sql @@ -7,7 +7,7 @@ create table rmt2 (n int, m int, k int) engine=ReplicatedMergeTree('/test/02446/ settings storage_policy='s3_cache', allow_remote_fs_zero_copy_replication=1, old_parts_lifetime=0, cleanup_delay_period=0, max_cleanup_delay_period=1, cleanup_delay_period_random_add=1, min_bytes_for_wide_part=0; -- FIXME zero-copy locks may remain in ZooKeeper forever if we failed to insert a part. --- Probably that's why we have to replace repsistent lock with ephemeral sometimes. +-- Probably that's why we have to replace persistent lock with ephemeral sometimes. -- See also "Replacing persistent lock with ephemeral for path {}. It can happen only in case of local part loss" -- in StorageReplicatedMergeTree::createZeroCopyLockNode set insert_keeper_fault_injection_probability=0; @@ -23,6 +23,10 @@ select sleepEachRow(0.5) as test_does_not_rely_on_this; insert into rmt1 values(5, 5, 5); alter table rmt2 update m = m * 10 where 1 settings mutations_sync=2; +-- wait for parts to be merged +select throwIf(name = 'all_0_5_1_6') from system.parts where database=currentDatabase() and table like 'rmt%' and active +format Null; -- { retry 30 until serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } + system sync replica rmt2; set optimize_throw_if_noop=1; optimize table rmt2 final; @@ -32,10 +36,10 @@ select 1, * from rmt1 order by n; system sync replica rmt1; select 2, * from rmt2 order by n; --- a funny way to wait for outdated parts to be removed -select sleep(1), sleepEachRow(0.1) from url('http://localhost:8123/?param_tries={1..10}&query=' || encodeURLComponent( - 'select *, _state from system.parts where database=''' || currentDatabase() || ''' and table like ''rmt%'' and active=0' - ), 'LineAsString', 's String') settings max_threads=1 format Null; +-- wait for outdated parts to be removed +select throwIf(count() = 0) from ( +select *, _state from system.parts where database=currentDatabase() and table like 'rmt%' and active=0 +) format Null; -- { retry 30 until serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } select *, _state from system.parts where database=currentDatabase() and table like 'rmt%' and active=0; From 7612060d232a24dbd721597c8e33cd1f556cddd6 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Thu, 25 Jul 2024 06:40:51 +0000 Subject: [PATCH 169/661] allow only equal types in lagInFrame and leadInFrame --- src/Processors/Transforms/WindowTransform.cpp | 15 ++------------ .../03210_lag_lead_inframe_types.reference | 20 +++++++++++++++++++ .../03210_lag_lead_inframe_types.sql | 4 ++++ 3 files changed, 26 insertions(+), 13 deletions(-) create mode 100644 tests/queries/0_stateless/03210_lag_lead_inframe_types.reference create mode 100644 tests/queries/0_stateless/03210_lag_lead_inframe_types.sql diff --git a/src/Processors/Transforms/WindowTransform.cpp b/src/Processors/Transforms/WindowTransform.cpp index 86421adf4fb..06ae2bfb25e 100644 --- a/src/Processors/Transforms/WindowTransform.cpp +++ b/src/Processors/Transforms/WindowTransform.cpp @@ -2385,22 +2385,11 @@ struct WindowFunctionLagLeadInFrame final : public WindowFunction return; } - const auto supertype = getLeastSupertype(DataTypes{argument_types[0], argument_types[2]}); - if (!supertype) - { + if (!argument_types[0]->equals(*argument_types[2])) throw Exception(ErrorCodes::BAD_ARGUMENTS, - "There is no supertype for the argument type '{}' and the default value type '{}'", + "Argument type '{}' and the default value type '{}' are different", argument_types[0]->getName(), argument_types[2]->getName()); - } - if (!argument_types[0]->equals(*supertype)) - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, - "The supertype '{}' for the argument type '{}' and the default value type '{}' is not the same as the argument type", - supertype->getName(), - argument_types[0]->getName(), - argument_types[2]->getName()); - } if (argument_types.size() > 3) { diff --git a/tests/queries/0_stateless/03210_lag_lead_inframe_types.reference b/tests/queries/0_stateless/03210_lag_lead_inframe_types.reference new file mode 100644 index 00000000000..cc3b9a096b9 --- /dev/null +++ b/tests/queries/0_stateless/03210_lag_lead_inframe_types.reference @@ -0,0 +1,20 @@ +0 +1 +2 +2 +2 +2 +2 +2 +2 +2 +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 diff --git a/tests/queries/0_stateless/03210_lag_lead_inframe_types.sql b/tests/queries/0_stateless/03210_lag_lead_inframe_types.sql new file mode 100644 index 00000000000..5466cfe0fad --- /dev/null +++ b/tests/queries/0_stateless/03210_lag_lead_inframe_types.sql @@ -0,0 +1,4 @@ +SELECT lagInFrame(2::UInt128, 2, number) OVER w FROM numbers(10) WINDOW w AS (ORDER BY number); -- { serverError BAD_ARGUMENTS } +SELECT leadInFrame(2::UInt128, 2, number) OVER w FROM numbers(10) WINDOW w AS (ORDER BY number); -- { serverError BAD_ARGUMENTS } +SELECT lagInFrame(2::UInt64, 2, number) OVER w FROM numbers(10) WINDOW w AS (ORDER BY number); +SELECT leadInFrame(2::UInt64, 2, number) OVER w FROM numbers(10) WINDOW w AS (ORDER BY number); From c6a643f981505d0293358c912723f1aece480c7c Mon Sep 17 00:00:00 2001 From: Nikita Fomichev Date: Thu, 25 Jul 2024 10:31:38 +0200 Subject: [PATCH 170/661] Update tests/queries/0_stateless/02992_all_columns_should_have_comment.sql --- .../0_stateless/02992_all_columns_should_have_comment.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02992_all_columns_should_have_comment.sql b/tests/queries/0_stateless/02992_all_columns_should_have_comment.sql index 0d34b033354..ad056384bfd 100644 --- a/tests/queries/0_stateless/02992_all_columns_should_have_comment.sql +++ b/tests/queries/0_stateless/02992_all_columns_should_have_comment.sql @@ -3,6 +3,6 @@ SELECT 'Column ' || name || ' from table ' || concat(database, '.', table) || ' FROM system.columns WHERE (database = 'system') AND (comment = '') AND - (table NOT ILIKE '%\_log\_%') AND + (table NOT ILIKE '%log%') AND (table NOT IN ('numbers', 'numbers_mt', 'one', 'generate_series', 'generateSeries', 'coverage_log', 'filesystem_read_prefetches_log')) AND (default_kind != 'ALIAS'); From b5171df7798323761b366f01d401c0559ff4c736 Mon Sep 17 00:00:00 2001 From: Nikita Fomichev Date: Thu, 25 Jul 2024 10:32:52 +0200 Subject: [PATCH 171/661] Update test 03198_table_function_directory_path.sql --- .../0_stateless/03198_table_function_directory_path.reference | 1 + .../queries/0_stateless/03198_table_function_directory_path.sql | 2 ++ 2 files changed, 3 insertions(+) diff --git a/tests/queries/0_stateless/03198_table_function_directory_path.reference b/tests/queries/0_stateless/03198_table_function_directory_path.reference index 19920de3d3c..74cd8c6d31f 100644 --- a/tests/queries/0_stateless/03198_table_function_directory_path.reference +++ b/tests/queries/0_stateless/03198_table_function_directory_path.reference @@ -1,3 +1,4 @@ 2 2 1 +1 diff --git a/tests/queries/0_stateless/03198_table_function_directory_path.sql b/tests/queries/0_stateless/03198_table_function_directory_path.sql index 9e2791847af..90f687ed6a3 100644 --- a/tests/queries/0_stateless/03198_table_function_directory_path.sql +++ b/tests/queries/0_stateless/03198_table_function_directory_path.sql @@ -1,5 +1,6 @@ -- Tags: no-parallel +INSERT INTO FUNCTION file('data_03198_table_function_directory_path.csv', 'csv') SELECT '1.csv' SETTINGS engine_file_truncate_on_insert=1; INSERT INTO FUNCTION file('data_03198_table_function_directory_path/1.csv', 'csv') SELECT '1.csv' SETTINGS engine_file_truncate_on_insert=1; INSERT INTO FUNCTION file('data_03198_table_function_directory_path/2.csv', 'csv') SELECT '2.csv' SETTINGS engine_file_truncate_on_insert=1; INSERT INTO FUNCTION file('data_03198_table_function_directory_path/dir/3.csv', 'csv') SELECT '3.csv' SETTINGS engine_file_truncate_on_insert=1; @@ -11,3 +12,4 @@ SELECT COUNT(*) FROM file('data_03198_table_function_directory_path/'); SELECT COUNT(*) FROM file('data_03198_table_function_directory_path/dir'); SELECT COUNT(*) FROM file('data_03198_table_function_directory_path/*/dir', 'csv'); -- { serverError 74, 636 } SELECT COUNT(*) FROM file('data_03198_table_function_directory_pat'); -- { serverError 400 } +SELECT COUNT(*) FROM file('data_03198_table_function_directory_path.csv'); From 60f529f667069c15fa49296ac1f59a33d94d3f31 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Thu, 25 Jul 2024 11:06:00 +0200 Subject: [PATCH 172/661] The most precise way of tracking flushing time in 01246_buffer_flush Right now there are couple of issues with the test: - it does not takes into account INSERT time - it does not takes into account SELECT time, which can be significant from time to time, for instance here [1] it takes 3.3 seconds (and due to tsan build it is not possible to find out why) 2024.07.23 20:52:18.238844 [ 13045 ] {d903650b-ab87-44f3-b7c3-4145e02f1301} executeQuery: (from [::1]:39430) (comment: 01246_buffer_flush.sh) select count() from data_01256; (stage: Complete) 2024.07.23 20:52:21.588183 [ 13045 ] {d903650b-ab87-44f3-b7c3-4145e02f1301} TCPHandler: Processed in 3.354887498 sec. [1]: https://s3.amazonaws.com/clickhouse-test-reports/66934/919005c4f70b044ecd9cc1bbce5dc5e276e11929/stateless_tests__tsan__s3_storage__[4_4].html Anyway all of this can be fixed by using QueryStart-insert into data table time. Signed-off-by: Azat Khuzhin --- .../queries/0_stateless/01246_buffer_flush.sh | 49 ++++++++++++------- 1 file changed, 31 insertions(+), 18 deletions(-) diff --git a/tests/queries/0_stateless/01246_buffer_flush.sh b/tests/queries/0_stateless/01246_buffer_flush.sh index 1ca953c80d9..27c3f01f216 100755 --- a/tests/queries/0_stateless/01246_buffer_flush.sh +++ b/tests/queries/0_stateless/01246_buffer_flush.sh @@ -5,59 +5,72 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -function elapsed_sec() +set -e + +function wait_until() { local expr=$1 && shift - local start end - start=$(date +%s.%N) while ! eval "$expr"; do sleep 0.5 done - end=$(date +%s.%N) - $CLICKHOUSE_LOCAL -q "select floor($end-$start)" +} +function get_buffer_delay() +{ + local buffer_insert_id=$1 && shift + $CLICKHOUSE_CLIENT -nm -q " + SYSTEM FLUSH LOGS; + WITH + (SELECT event_time_microseconds FROM system.query_log WHERE current_database = currentDatabase() AND type = 'QueryStart' AND query_id = '$buffer_insert_id') AS begin_, + (SELECT max(event_time) FROM data_01256) AS end_ + SELECT dateDiff('seconds', begin_, end_)::UInt64; + " } $CLICKHOUSE_CLIENT -nm -q " drop table if exists data_01256; drop table if exists buffer_01256; - create table data_01256 as system.numbers Engine=Memory(); + create table data_01256 (key UInt64, event_time DateTime(6) MATERIALIZED now64(6)) Engine=Memory(); " echo "min" -$CLICKHOUSE_CLIENT -nm -q " - create table buffer_01256 as system.numbers Engine=Buffer(currentDatabase(), data_01256, 1, +$CLICKHOUSE_CLIENT -q " + create table buffer_01256 (key UInt64) Engine=Buffer(currentDatabase(), data_01256, 1, 2, 100, /* time */ 4, 100, /* rows */ 1, 1e6 /* bytes */ - ); - insert into buffer_01256 select * from system.numbers limit 5; - select count() from data_01256; + ) " -sec=$(elapsed_sec '[[ $($CLICKHOUSE_CLIENT -q "select count() from data_01256") -eq 5 ]]') +min_query_id=$(random_str 10) +$CLICKHOUSE_CLIENT --query_id="$min_query_id" -q "insert into buffer_01256 select * from system.numbers limit 5" +$CLICKHOUSE_CLIENT -q "select count() from data_01256" +wait_until '[[ $($CLICKHOUSE_CLIENT -q "select count() from data_01256") -eq 5 ]]' +sec=$(get_buffer_delay "$min_query_id") [[ $sec -ge 2 ]] || echo "Buffer flushed too early, min_time=2, flushed after $sec sec" [[ $sec -lt 100 ]] || echo "Buffer flushed too late, max_time=100, flushed after $sec sec" $CLICKHOUSE_CLIENT -q "select count() from data_01256" $CLICKHOUSE_CLIENT -q "drop table buffer_01256" echo "max" -$CLICKHOUSE_CLIENT -nm -q " - create table buffer_01256 as system.numbers Engine=Buffer(currentDatabase(), data_01256, 1, +$CLICKHOUSE_CLIENT -q " + create table buffer_01256 (key UInt64) Engine=Buffer(currentDatabase(), data_01256, 1, 100, 2, /* time */ 0, 100, /* rows */ 0, 1e6 /* bytes */ ); - insert into buffer_01256 select * from system.numbers limit 5; - select count() from data_01256; " -sec=$(elapsed_sec '[[ $($CLICKHOUSE_CLIENT -q "select count() from data_01256") -eq 10 ]]') +max_query_id=$(random_str 10) +$CLICKHOUSE_CLIENT --query_id="$max_query_id" -q "insert into buffer_01256 select * from system.numbers limit 5" +$CLICKHOUSE_CLIENT -q "select count() from data_01256" +wait_until '[[ $($CLICKHOUSE_CLIENT -q "select count() from data_01256") -eq 10 ]]' +sec=$(get_buffer_delay "$max_query_id") [[ $sec -ge 2 ]] || echo "Buffer flushed too early, max_time=2, flushed after $sec sec" $CLICKHOUSE_CLIENT -q "select count() from data_01256" $CLICKHOUSE_CLIENT -q "drop table buffer_01256" echo "direct" $CLICKHOUSE_CLIENT -nm -q " - create table buffer_01256 as system.numbers Engine=Buffer(currentDatabase(), data_01256, 1, + create table buffer_01256 (key UInt64) Engine=Buffer(currentDatabase(), data_01256, 1, 100, 100, /* time */ 0, 9, /* rows */ 0, 1e6 /* bytes */ From c2f85c6fd062dde095ee34178450dc94c245e691 Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Thu, 25 Jul 2024 17:43:02 +0800 Subject: [PATCH 173/661] support map type as first argument type --- .../functions/tuple-map-functions.md | 2 +- src/Functions/map.cpp | 131 +++++++++++------- .../0_stateless/01651_map_functions.reference | 3 + .../0_stateless/01651_map_functions.sql | 9 +- 4 files changed, 89 insertions(+), 56 deletions(-) diff --git a/docs/en/sql-reference/functions/tuple-map-functions.md b/docs/en/sql-reference/functions/tuple-map-functions.md index 24b356eca87..ae23387f6e5 100644 --- a/docs/en/sql-reference/functions/tuple-map-functions.md +++ b/docs/en/sql-reference/functions/tuple-map-functions.md @@ -62,7 +62,7 @@ Alias: `MAP_FROM_ARRAYS(keys, values)` **Arguments** -- `keys` — Array of keys to create the map from. [Array(T)](../data-types/array.md) where `T` can be any type supported by [Map](../data-types/map.md) as key type. +- `keys` — Array or map of keys to create the map from. [Array(T)](../data-types/array.md) where `T` can be any type supported by [Map](../data-types/map.md) as key type. - `values` - Array or map of values to create the map from. [Array](../data-types/array.md) or [Map](../data-types/map.md). **Returned value** diff --git a/src/Functions/map.cpp b/src/Functions/map.cpp index 66cd10a3f0b..5319390fb70 100644 --- a/src/Functions/map.cpp +++ b/src/Functions/map.cpp @@ -1,14 +1,17 @@ -#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include #include -#include -#include -#include -#include -#include -#include -#include +#include #include +#include #include @@ -178,22 +181,28 @@ public: getName(), arguments.size()); - /// The first argument should always be Array. - /// Because key type can not be nested type of Map, which is Tuple - DataTypePtr key_type; - if (const auto * keys_type = checkAndGetDataType(arguments[0].get())) - key_type = keys_type->getNestedType(); - else - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "First argument for function {} must be an Array", getName()); + auto get_nested_type = [this](const DataTypePtr & type) -> DataTypePtr + { + DataTypePtr nested; + if (const auto * array_type = checkAndGetDataType(type.get())) + nested = array_type->getNestedType(); + else if (const auto * map_type = checkAndGetDataType(type.get())) + nested = std::make_shared(map_type->getKeyValueTypes()); + else + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Argument types of function {} must be Array or Map, but {} is given", + getName(), + type->getName()); - DataTypePtr value_type; - if (const auto * value_array_type = checkAndGetDataType(arguments[1].get())) - value_type = value_array_type->getNestedType(); - else if (const auto * value_map_type = checkAndGetDataType(arguments[1].get())) - value_type = std::make_shared(value_map_type->getKeyValueTypes()); - else - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Second argument for function {} must be Array or Map", getName()); + return nested; + }; + auto key_type = get_nested_type(arguments[0]); + auto value_type = get_nested_type(arguments[1]); + + /// Remove Nullable from key_type if needed for map key must not be Nullable + key_type = removeNullableOrLowCardinalityNullable(key_type); DataTypes key_value_types{key_type, value_type}; return std::make_shared(key_value_types); } @@ -201,44 +210,62 @@ public: ColumnPtr executeImpl( const ColumnsWithTypeAndName & arguments, const DataTypePtr & /* result_type */, size_t /* input_rows_count */) const override { - bool is_keys_const = isColumnConst(*arguments[0].column); - ColumnPtr holder_keys; - const ColumnArray * col_keys; - if (is_keys_const) + auto get_array_column = [this](const ColumnPtr & column) -> std::pair { - holder_keys = arguments[0].column->convertToFullColumnIfConst(); - col_keys = checkAndGetColumn(holder_keys.get()); - } - else + bool is_const = isColumnConst(*column); + ColumnPtr holder = is_const ? column->convertToFullColumnIfConst() : column; + + const ColumnArray * col_res = nullptr; + if (const auto * col_array = checkAndGetColumn(holder.get())) + col_res = col_array; + else if (const auto * col_map = checkAndGetColumn(holder.get())) + col_res = &col_map->getNestedColumn(); + else + throw Exception( + ErrorCodes::ILLEGAL_COLUMN, + "Argument columns of function {} must be Array or Map, but {} is given", + getName(), + holder->getName()); + + return {col_res, holder}; + }; + + auto [col_keys, key_holder] = get_array_column(arguments[0].column); + + /// Check if nested column of first argument contains NULL value in case its nested type is Nullable(T) type. + ColumnPtr data_keys = col_keys->getDataPtr(); + if (isColumnNullableOrLowCardinalityNullable(*data_keys)) { - col_keys = checkAndGetColumn(arguments[0].column.get()); + std::cout << "data keys is nullable" << std::endl; + const NullMap * null_map = nullptr; + if (const auto * nullable = checkAndGetColumn(data_keys.get())) + { + null_map = &nullable->getNullMapData(); + data_keys = nullable->getNestedColumnPtr(); + } + else if (const auto * low_cardinality = checkAndGetColumn(data_keys.get())) + { + if (const auto * nullable_dict = checkAndGetColumn(low_cardinality->getDictionaryPtr().get())) + { + null_map = &nullable_dict->getNullMapData(); + data_keys = ColumnLowCardinality::create(nullable_dict->getNestedColumnPtr(), low_cardinality->getIndexesPtr()); + } + } + + if (null_map && !memoryIsZero(null_map->data(), 0, null_map->size())) + throw Exception( + ErrorCodes::ILLEGAL_COLUMN, "The nested column of first argument in function {} must not contain NULLs", getName()); } - if (!col_keys) - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "The first argument of function {} must be Array", getName()); - - bool is_values_const = isColumnConst(*arguments[1].column); - ColumnPtr holder_values; - if (is_values_const) - holder_values = arguments[1].column->convertToFullColumnIfConst(); - else - holder_values = arguments[1].column; - - const ColumnArray * col_values; - if (const auto * col_values_array = checkAndGetColumn(holder_values.get())) - col_values = col_values_array; - else if (const auto * col_values_map = checkAndGetColumn(holder_values.get())) - col_values = &col_values_map->getNestedColumn(); - else - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "The second arguments of function {} must be Array or Map", getName()); - + auto [col_values, values_holder] = get_array_column(arguments[1].column); if (!col_keys->hasEqualOffsets(*col_values)) - throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH, "Two arguments for function {} must have equal sizes", getName()); + throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH, "Two arguments of function {} must have equal sizes", getName()); - const auto & data_keys = col_keys->getDataPtr(); const auto & data_values = col_values->getDataPtr(); const auto & offsets = col_keys->getOffsetsPtr(); - auto nested_column = ColumnArray::create(ColumnTuple::create(Columns{data_keys, data_values}), offsets); + std::cout << "before create array:" << "offsets:" << offsets->getName() << std::endl; + auto nested_column = ColumnArray::create(ColumnTuple::create(Columns{std::move(data_keys), data_values}), offsets); + std::cout << "after create array:" << "offsets:" << offsets->getName() << std::endl; return ColumnMap::create(nested_column); } }; diff --git a/tests/queries/0_stateless/01651_map_functions.reference b/tests/queries/0_stateless/01651_map_functions.reference index 471da5586b7..9114aa419b1 100644 --- a/tests/queries/0_stateless/01651_map_functions.reference +++ b/tests/queries/0_stateless/01651_map_functions.reference @@ -52,3 +52,6 @@ {1:4,2:5} {1:4,2:5} {1:4,2:5} +{1:3,2:4} +{1:3,2:4} +{1:3,2:4} {(1,3):'a',(2,4):'b'} diff --git a/tests/queries/0_stateless/01651_map_functions.sql b/tests/queries/0_stateless/01651_map_functions.sql index cf2460fce2c..4604ddd6db1 100644 --- a/tests/queries/0_stateless/01651_map_functions.sql +++ b/tests/queries/0_stateless/01651_map_functions.sql @@ -67,12 +67,15 @@ select mapFromArrays(['aa', 'bb'], [4, 5, 6]); -- { serverError SIZES_OF_ARRAYS_ select mapFromArrays([[1,2], [3,4]], [4, 5, 6]); -- { serverError SIZES_OF_ARRAYS_DONT_MATCH } select mapFromArrays(['a', 2], [4, 5]); -- { serverError NO_COMMON_TYPE} select mapFromArrays([1, 2], [4, 'a']); -- { serverError NO_COMMON_TYPE} +select mapFromArrays(['aa', 'bb'], map('a', 4)); -- { serverError SIZES_OF_ARRAYS_DONT_MATCH } +select mapFromArrays([1,null]::Array(Nullable(UInt8)), [3,4]); -- { serverError ILLEGAL_COLUMN } select mapFromArrays(['aa', 'bb'], map('a', 4, 'b', 5)); select mapFromArrays(['aa', 'bb'], materialize(map('a', 4, 'b', 5))) from numbers(2); -select mapFromArrays(map('a', 4, 'b', 4), ['aa', 'bb']) from numbers(2); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } -select mapFromArrays(['aa', 'bb'], map('a', 4)); -- { serverError SIZES_OF_ARRAYS_DONT_MATCH } - select mapFromArrays([toLowCardinality(1), toLowCardinality(2)], [4, 5]); select mapFromArrays([toLowCardinality(1), toLowCardinality(2)], materialize([4, 5])) from numbers(2); + +select mapFromArrays([1,2], [3,4]); +select mapFromArrays([1,2]::Array(Nullable(UInt8)), [3,4]); +select mapFromArrays([1,2], [3,4]) as x, mapFromArrays(x, ['a', 'b']); From 65573871485c2e8ca45d791551856fd2f8622cf9 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Thu, 25 Jul 2024 11:05:36 +0200 Subject: [PATCH 174/661] Try calcualting memory with numactl if membind used --- .gitmodules | 3 + base/base/CMakeLists.txt | 4 ++ base/base/getMemoryAmount.cpp | 26 +++++++- contrib/CMakeLists.txt | 2 + contrib/numactl | 1 + contrib/numactl-cmake/CMakeLists.txt | 20 +++++++ contrib/numactl-cmake/include/config.h | 82 ++++++++++++++++++++++++++ programs/server/Server.cpp | 27 +++++++++ src/Common/config.h.in | 1 + src/configure_config.cmake | 3 + 10 files changed, 168 insertions(+), 1 deletion(-) create mode 160000 contrib/numactl create mode 100644 contrib/numactl-cmake/CMakeLists.txt create mode 100644 contrib/numactl-cmake/include/config.h diff --git a/.gitmodules b/.gitmodules index 12d865307d8..b5d7e1e56b3 100644 --- a/.gitmodules +++ b/.gitmodules @@ -372,3 +372,6 @@ [submodule "contrib/double-conversion"] path = contrib/double-conversion url = https://github.com/ClickHouse/double-conversion.git +[submodule "contrib/numactl"] + path = contrib/numactl + url = https://github.com/numactl/numactl.git diff --git a/base/base/CMakeLists.txt b/base/base/CMakeLists.txt index 159502c9735..451a6eb5e8b 100644 --- a/base/base/CMakeLists.txt +++ b/base/base/CMakeLists.txt @@ -46,6 +46,10 @@ if (TARGET ch_contrib::crc32_s390x) target_link_libraries(common PUBLIC ch_contrib::crc32_s390x) endif() +if (TARGET ch_contrib::numactl) + target_link_libraries(common PUBLIC ch_contrib::numactl) +endif() + target_include_directories(common PUBLIC .. "${CMAKE_CURRENT_BINARY_DIR}/..") target_link_libraries (common diff --git a/base/base/getMemoryAmount.cpp b/base/base/getMemoryAmount.cpp index afdb6ba068a..b8162146496 100644 --- a/base/base/getMemoryAmount.cpp +++ b/base/base/getMemoryAmount.cpp @@ -4,12 +4,17 @@ #include #include -#include #include #include #include +#include "config.h" + +#if USE_NUMACTL +#include +#endif + namespace { @@ -63,6 +68,25 @@ uint64_t getMemoryAmountOrZero() uint64_t memory_amount = num_pages * page_size; +#if USE_NUMACTL + if (numa_available() != -1) + { + auto * membind = numa_get_membind(); + if (!numa_bitmask_equal(membind, numa_all_nodes_ptr)) + { + uint64_t total_numa_memory = 0; + auto max_node = numa_max_node(); + for (int i = 0; i <= max_node; ++i) + { + if (numa_bitmask_isbitset(membind, i)) + total_numa_memory += numa_node_size(i, nullptr); + } + + memory_amount = total_numa_memory; + } + } +#endif + /// Respect the memory limit set by cgroups v2. auto limit_v2 = getCgroupsV2MemoryLimit(); if (limit_v2.has_value() && *limit_v2 < memory_amount) diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index 90ae5981a21..977efda15ff 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -230,6 +230,8 @@ add_contrib (libssh-cmake libssh) add_contrib (prometheus-protobufs-cmake prometheus-protobufs prometheus-protobufs-gogo) +add_contrib(numactl-cmake numactl) + # Put all targets defined here and in subdirectories under "contrib/" folders in GUI-based IDEs. # Some of third-party projects may override CMAKE_FOLDER or FOLDER property of their targets, so they would not appear # in "contrib/..." as originally planned, so we workaround this by fixing FOLDER properties of all targets manually, diff --git a/contrib/numactl b/contrib/numactl new file mode 160000 index 00000000000..3871b1c42fc --- /dev/null +++ b/contrib/numactl @@ -0,0 +1 @@ +Subproject commit 3871b1c42fc71bceadafd745d2eff5dddfc2d67e diff --git a/contrib/numactl-cmake/CMakeLists.txt b/contrib/numactl-cmake/CMakeLists.txt new file mode 100644 index 00000000000..5d086366c7f --- /dev/null +++ b/contrib/numactl-cmake/CMakeLists.txt @@ -0,0 +1,20 @@ +option (ENABLE_NUMACTL "Enable numactl" ${ENABLE_LIBRARIES}) + +if (NOT ENABLE_NUMACTL) + message (STATUS "Not using numactl") + return() +endif () + +set (LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/numactl") + +set (SRCS + "${LIBRARY_DIR}/libnuma.c" + "${LIBRARY_DIR}/syscall.c" +) + +add_library(_numactl ${SRCS}) + +target_include_directories(_numactl SYSTEM PRIVATE include) +target_include_directories(_numactl SYSTEM PUBLIC "${LIBRARY_DIR}") + +add_library(ch_contrib::numactl ALIAS _numactl) diff --git a/contrib/numactl-cmake/include/config.h b/contrib/numactl-cmake/include/config.h new file mode 100644 index 00000000000..a304db38e53 --- /dev/null +++ b/contrib/numactl-cmake/include/config.h @@ -0,0 +1,82 @@ +/* config.h. Generated from config.h.in by configure. */ +/* config.h.in. Generated from configure.ac by autoheader. */ + +/* Checking for symver attribute */ +#define HAVE_ATTRIBUTE_SYMVER 0 + +/* Define to 1 if you have the header file. */ +#define HAVE_DLFCN_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_INTTYPES_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STDINT_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STDIO_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STDLIB_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STRINGS_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STRING_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_STAT_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_TYPES_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_UNISTD_H 1 + +/* Define to the sub-directory where libtool stores uninstalled libraries. */ +#define LT_OBJDIR ".libs/" + +/* Name of package */ +#define PACKAGE "numactl" + +/* Define to the address where bug reports for this package should be sent. */ +#define PACKAGE_BUGREPORT "" + +/* Define to the full name of this package. */ +#define PACKAGE_NAME "numactl" + +/* Define to the full name and version of this package. */ +#define PACKAGE_STRING "numactl 2.1" + +/* Define to the one symbol short name of this package. */ +#define PACKAGE_TARNAME "numactl" + +/* Define to the home page for this package. */ +#define PACKAGE_URL "" + +/* Define to the version of this package. */ +#define PACKAGE_VERSION "2.1" + +/* Define to 1 if all of the C89 standard headers exist (not just the ones + required in a freestanding environment). This macro is provided for + backward compatibility; new code need not use it. */ +#define STDC_HEADERS 1 + +/* If the compiler supports a TLS storage class define it to that here */ +#define TLS __thread + +/* Version number of package */ +#define VERSION "2.1" + +/* Number of bits in a file offset, on hosts where this is settable. */ +/* #undef _FILE_OFFSET_BITS */ + +/* Define to 1 on platforms where this makes off_t a 64-bit type. */ +/* #undef _LARGE_FILES */ + +/* Number of bits in time_t, on hosts where this is settable. */ +/* #undef _TIME_BITS */ + +/* Define to 1 on platforms where this makes time_t a 64-bit type. */ +/* #undef __MINGW_USE_VC2005_COMPAT */ diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 16888015f8b..619a72ff200 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -140,6 +140,11 @@ # include #endif +#if USE_NUMACTL +#include +#endif + + #include /// A minimal file used when the server is run without installation INCBIN(resource_embedded_xml, SOURCE_DIR "/programs/server/embedded.xml"); @@ -754,6 +759,28 @@ try setenv("OPENSSL_CONF", config_dir.c_str(), true); /// NOLINT } +#if USE_NUMACTL + if (numa_available() != -1) + { + auto * membind = numa_get_membind(); + if (!numa_bitmask_equal(membind, numa_all_nodes_ptr)) + { + uint64_t total_numa_memory = 0; + auto max_node = numa_max_node(); + for (int i = 0; i <= max_node; ++i) + { + if (numa_bitmask_isbitset(membind, i)) + total_numa_memory += numa_node_size(i, nullptr); + } + + LOG_INFO( + log, + "ClickHouse is bound to a subset of NUMA nodes. Total memory of all available nodes {}", + ReadableSize(total_numa_memory)); + } + } +#endif + registerInterpreters(); registerFunctions(); registerAggregateFunctions(); diff --git a/src/Common/config.h.in b/src/Common/config.h.in index f68701d5d10..6a0090130a3 100644 --- a/src/Common/config.h.in +++ b/src/Common/config.h.in @@ -64,6 +64,7 @@ #cmakedefine01 USE_LIBARCHIVE #cmakedefine01 USE_POCKETFFT #cmakedefine01 USE_PROMETHEUS_PROTOBUFS +#cmakedefine01 USE_NUMACTL /// This is needed for .incbin in assembly. For some reason, include paths don't work there in presence of LTO. /// That's why we use absolute paths. diff --git a/src/configure_config.cmake b/src/configure_config.cmake index 75f61baa854..d22bf674df4 100644 --- a/src/configure_config.cmake +++ b/src/configure_config.cmake @@ -173,5 +173,8 @@ endif() if (TARGET ch_contrib::prometheus_protobufs) set(USE_PROMETHEUS_PROTOBUFS 1) endif() +if (TARGET ch_contrib::numactl) + set(USE_NUMACTL 1) +endif() set(SOURCE_DIR ${PROJECT_SOURCE_DIR}) From e181ccd0173c46d31867097532f64df0be3944da Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Thu, 25 Jul 2024 17:53:51 +0800 Subject: [PATCH 175/661] update doc --- docs/en/sql-reference/functions/tuple-map-functions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/functions/tuple-map-functions.md b/docs/en/sql-reference/functions/tuple-map-functions.md index ae23387f6e5..db66188b1f5 100644 --- a/docs/en/sql-reference/functions/tuple-map-functions.md +++ b/docs/en/sql-reference/functions/tuple-map-functions.md @@ -62,7 +62,7 @@ Alias: `MAP_FROM_ARRAYS(keys, values)` **Arguments** -- `keys` — Array or map of keys to create the map from. [Array(T)](../data-types/array.md) where `T` can be any type supported by [Map](../data-types/map.md) as key type. +- `keys` — Array or map of keys to create the map from. [Array(T)](../data-types/array.md) where `T` can be any type supported by [Map](../data-types/map.md) as key type, or [Map](../data-types/map.md). - `values` - Array or map of values to create the map from. [Array](../data-types/array.md) or [Map](../data-types/map.md). **Returned value** From 6968945373b2a73c135b0025cf892e21a2af4dbf Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Thu, 25 Jul 2024 09:58:32 +0000 Subject: [PATCH 176/661] Functions [s-t]*: Iterate over input_rows_count where appropriate --- src/Functions/FunctionTokens.h | 9 +- src/Functions/seriesDecomposeSTL.cpp | 4 +- src/Functions/space.cpp | 50 +++---- src/Functions/stem.cpp | 9 +- src/Functions/stringCutToZero.cpp | 21 ++- src/Functions/substringIndex.cpp | 22 ++-- src/Functions/subtractNanoseconds.cpp | 1 + src/Functions/throwIf.cpp | 2 +- src/Functions/timeSlots.cpp | 80 ++++++------ src/Functions/toDecimalString.cpp | 64 ++++----- src/Functions/toStartOfInterval.cpp | 44 +++---- src/Functions/tokenExtractors.cpp | 18 +-- src/Functions/transform.cpp | 167 ++++++++++++------------ src/Functions/translate.cpp | 16 ++- src/Functions/tupleToNameValuePairs.cpp | 6 +- 15 files changed, 251 insertions(+), 262 deletions(-) diff --git a/src/Functions/FunctionTokens.h b/src/Functions/FunctionTokens.h index f1435ca5651..b6d8e9ee589 100644 --- a/src/Functions/FunctionTokens.h +++ b/src/Functions/FunctionTokens.h @@ -84,7 +84,7 @@ public: return std::make_shared(std::make_shared()); } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override { Generator generator; generator.init(arguments, max_substrings_includes_remaining_string); @@ -107,18 +107,17 @@ public: const ColumnString::Chars & src_chars = col_str->getChars(); const ColumnString::Offsets & src_offsets = col_str->getOffsets(); - res_offsets.reserve(src_offsets.size()); - res_strings_offsets.reserve(src_offsets.size() * 5); /// Constant 5 - at random. + res_offsets.reserve(input_rows_count); + res_strings_offsets.reserve(input_rows_count * 5); /// Constant 5 - at random. res_strings_chars.reserve(src_chars.size()); Pos token_begin = nullptr; Pos token_end = nullptr; - size_t size = src_offsets.size(); ColumnString::Offset current_src_offset = 0; ColumnArray::Offset current_dst_offset = 0; ColumnString::Offset current_dst_strings_offset = 0; - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { Pos pos = reinterpret_cast(&src_chars[current_src_offset]); current_src_offset = src_offsets[i]; diff --git a/src/Functions/seriesDecomposeSTL.cpp b/src/Functions/seriesDecomposeSTL.cpp index 720aa1e0799..1e1c41cafad 100644 --- a/src/Functions/seriesDecomposeSTL.cpp +++ b/src/Functions/seriesDecomposeSTL.cpp @@ -50,7 +50,7 @@ public: return std::make_shared(std::make_shared(std::make_shared())); } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { ColumnPtr array_ptr = arguments[0].column; const ColumnArray * array = checkAndGetColumn(array_ptr.get()); @@ -79,7 +79,7 @@ public: ColumnArray::Offset prev_src_offset = 0; - for (size_t i = 0; i < src_offsets.size(); ++i) + for (size_t i = 0; i < input_rows_count; ++i) { UInt64 period; auto period_ptr = arguments[1].column->convertToFullColumnIfConst(); diff --git a/src/Functions/space.cpp b/src/Functions/space.cpp index cd6ca73c088..cf1634e0319 100644 --- a/src/Functions/space.cpp +++ b/src/Functions/space.cpp @@ -55,7 +55,7 @@ public: template - bool executeConstant(ColumnPtr col_times, ColumnString::Offsets & res_offsets, ColumnString::Chars & res_chars) const + bool executeConstant(ColumnPtr col_times, ColumnString::Offsets & res_offsets, ColumnString::Chars & res_chars, size_t input_rows_count) const { const ColumnConst & col_times_const = checkAndGetColumn(*col_times); @@ -71,12 +71,12 @@ public: checkRepeatTime(times); - res_offsets.resize(col_times->size()); - res_chars.resize(col_times->size() * (times + 1)); + res_offsets.resize(input_rows_count); + res_chars.resize(input_rows_count * (times + 1)); size_t pos = 0; - for (size_t i = 0; i < col_times->size(); ++i) + for (size_t i = 0; i < input_rows_count; ++i) { memset(res_chars.begin() + pos, space, times); pos += times; @@ -92,20 +92,20 @@ public: template - bool executeVector(ColumnPtr col_times_, ColumnString::Offsets & res_offsets, ColumnString::Chars & res_chars) const + bool executeVector(ColumnPtr col_times_, ColumnString::Offsets & res_offsets, ColumnString::Chars & res_chars, size_t input_rows_count) const { auto * col_times = checkAndGetColumn(col_times_.get()); if (!col_times) return false; - res_offsets.resize(col_times->size()); - res_chars.resize(col_times->size() * 10); /// heuristic + res_offsets.resize(input_rows_count); + res_chars.resize(input_rows_count * 10); /// heuristic const PaddedPODArray & times_data = col_times->getData(); size_t pos = 0; - for (size_t i = 0; i < col_times->size(); ++i) + for (size_t i = 0; i < input_rows_count; ++i) { typename DataType::FieldType times = times_data[i]; @@ -132,7 +132,7 @@ public: } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { const auto & col_num = arguments[0].column; @@ -143,26 +143,26 @@ public: if (const ColumnConst * col_num_const = checkAndGetColumn(col_num.get())) { - if ((executeConstant(col_num, res_offsets, res_chars)) - || (executeConstant(col_num, res_offsets, res_chars)) - || (executeConstant(col_num, res_offsets, res_chars)) - || (executeConstant(col_num, res_offsets, res_chars)) - || (executeConstant(col_num, res_offsets, res_chars)) - || (executeConstant(col_num, res_offsets, res_chars)) - || (executeConstant(col_num, res_offsets, res_chars)) - || (executeConstant(col_num, res_offsets, res_chars))) + if ((executeConstant(col_num, res_offsets, res_chars, input_rows_count)) + || (executeConstant(col_num, res_offsets, res_chars, input_rows_count)) + || (executeConstant(col_num, res_offsets, res_chars, input_rows_count)) + || (executeConstant(col_num, res_offsets, res_chars, input_rows_count)) + || (executeConstant(col_num, res_offsets, res_chars, input_rows_count)) + || (executeConstant(col_num, res_offsets, res_chars, input_rows_count)) + || (executeConstant(col_num, res_offsets, res_chars, input_rows_count)) + || (executeConstant(col_num, res_offsets, res_chars, input_rows_count))) return col_res; } else { - if ((executeVector(col_num, res_offsets, res_chars)) - || (executeVector(col_num, res_offsets, res_chars)) - || (executeVector(col_num, res_offsets, res_chars)) - || (executeVector(col_num, res_offsets, res_chars)) - || (executeVector(col_num, res_offsets, res_chars)) - || (executeVector(col_num, res_offsets, res_chars)) - || (executeVector(col_num, res_offsets, res_chars)) - || (executeVector(col_num, res_offsets, res_chars))) + if ((executeVector(col_num, res_offsets, res_chars, input_rows_count)) + || (executeVector(col_num, res_offsets, res_chars, input_rows_count)) + || (executeVector(col_num, res_offsets, res_chars, input_rows_count)) + || (executeVector(col_num, res_offsets, res_chars, input_rows_count)) + || (executeVector(col_num, res_offsets, res_chars, input_rows_count)) + || (executeVector(col_num, res_offsets, res_chars, input_rows_count)) + || (executeVector(col_num, res_offsets, res_chars, input_rows_count)) + || (executeVector(col_num, res_offsets, res_chars, input_rows_count))) return col_res; } diff --git a/src/Functions/stem.cpp b/src/Functions/stem.cpp index 5b845cf332b..b3be40f4022 100644 --- a/src/Functions/stem.cpp +++ b/src/Functions/stem.cpp @@ -32,7 +32,8 @@ struct StemImpl const ColumnString::Offsets & offsets, ColumnString::Chars & res_data, ColumnString::Offsets & res_offsets, - const String & language) + const String & language, + size_t input_rows_count) { sb_stemmer * stemmer = sb_stemmer_new(language.data(), "UTF_8"); @@ -45,7 +46,7 @@ struct StemImpl res_offsets.assign(offsets); UInt64 data_size = 0; - for (UInt64 i = 0; i < offsets.size(); ++i) + for (UInt64 i = 0; i < input_rows_count; ++i) { /// Note that accessing -1th element is valid for PaddedPODArray. size_t original_size = offsets[i] - offsets[i - 1]; @@ -101,7 +102,7 @@ public: ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {0}; } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { const auto & langcolumn = arguments[0].column; const auto & strcolumn = arguments[1].column; @@ -119,7 +120,7 @@ public: String language = lang_col->getValue(); auto col_res = ColumnString::create(); - StemImpl::vector(words_col->getChars(), words_col->getOffsets(), col_res->getChars(), col_res->getOffsets(), language); + StemImpl::vector(words_col->getChars(), words_col->getOffsets(), col_res->getChars(), col_res->getOffsets(), language, input_rows_count); return col_res; } }; diff --git a/src/Functions/stringCutToZero.cpp b/src/Functions/stringCutToZero.cpp index b9f742cd8bc..16e57d741fa 100644 --- a/src/Functions/stringCutToZero.cpp +++ b/src/Functions/stringCutToZero.cpp @@ -40,7 +40,7 @@ public: bool useDefaultImplementationForConstants() const override { return true; } - static bool tryExecuteString(const IColumn * col, ColumnPtr & col_res) + static bool tryExecuteString(const IColumn * col, ColumnPtr & col_res, size_t input_rows_count) { const ColumnString * col_str_in = checkAndGetColumn(col); @@ -53,8 +53,7 @@ public: const ColumnString::Chars & in_vec = col_str_in->getChars(); const ColumnString::Offsets & in_offsets = col_str_in->getOffsets(); - size_t size = in_offsets.size(); - out_offsets.resize(size); + out_offsets.resize(input_rows_count); out_vec.resize(in_vec.size()); char * begin = reinterpret_cast(out_vec.data()); @@ -62,7 +61,7 @@ public: ColumnString::Offset current_in_offset = 0; - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { const char * pos_in = reinterpret_cast(&in_vec[current_in_offset]); size_t current_size = strlen(pos_in); @@ -87,7 +86,7 @@ public: } } - static bool tryExecuteFixedString(const IColumn * col, ColumnPtr & col_res) + static bool tryExecuteFixedString(const IColumn * col, ColumnPtr & col_res, size_t input_rows_count) { const ColumnFixedString * col_fstr_in = checkAndGetColumn(col); @@ -99,10 +98,8 @@ public: const ColumnString::Chars & in_vec = col_fstr_in->getChars(); - size_t size = col_fstr_in->size(); - - out_offsets.resize(size); - out_vec.resize(in_vec.size() + size); + out_offsets.resize(input_rows_count); + out_vec.resize(in_vec.size() + input_rows_count); char * begin = reinterpret_cast(out_vec.data()); char * pos = begin; @@ -110,7 +107,7 @@ public: size_t n = col_fstr_in->getN(); - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { size_t current_size = strnlen(pos_in, n); memcpySmallAllowReadWriteOverflow15(pos, pos_in, current_size); @@ -133,12 +130,12 @@ public: } } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { const IColumn * column = arguments[0].column.get(); ColumnPtr res_column; - if (tryExecuteFixedString(column, res_column) || tryExecuteString(column, res_column)) + if (tryExecuteFixedString(column, res_column, input_rows_count) || tryExecuteString(column, res_column, input_rows_count)) return res_column; throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of argument of function {}", diff --git a/src/Functions/substringIndex.cpp b/src/Functions/substringIndex.cpp index eccd849059b..dc12ae193ff 100644 --- a/src/Functions/substringIndex.cpp +++ b/src/Functions/substringIndex.cpp @@ -68,7 +68,7 @@ namespace return std::make_shared(); } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { ColumnPtr column_string = arguments[0].column; ColumnPtr column_delim = arguments[1].column; @@ -110,10 +110,10 @@ namespace if (is_count_const) { Int64 count = column_count->getInt(0); - vectorConstant(col_str, delim, count, vec_res, offsets_res); + vectorConstant(col_str, delim, count, vec_res, offsets_res, input_rows_count); } else - vectorVector(col_str, delim, column_count.get(), vec_res, offsets_res); + vectorVector(col_str, delim, column_count.get(), vec_res, offsets_res, input_rows_count); } return column_res; } @@ -124,18 +124,18 @@ namespace const String & delim, const IColumn * count_column, ColumnString::Chars & res_data, - ColumnString::Offsets & res_offsets) + ColumnString::Offsets & res_offsets, + size_t input_rows_count) { - size_t rows = str_column->size(); res_data.reserve(str_column->getChars().size() / 2); - res_offsets.reserve(rows); + res_offsets.reserve(input_rows_count); bool all_ascii = isAllASCII(str_column->getChars().data(), str_column->getChars().size()) && isAllASCII(reinterpret_cast(delim.data()), delim.size()); std::unique_ptr searcher = !is_utf8 || all_ascii ? nullptr : std::make_unique(delim.data(), delim.size()); - for (size_t i = 0; i < rows; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { StringRef str_ref = str_column->getDataAt(i); Int64 count = count_column->getInt(i); @@ -157,18 +157,18 @@ namespace const String & delim, Int64 count, ColumnString::Chars & res_data, - ColumnString::Offsets & res_offsets) + ColumnString::Offsets & res_offsets, + size_t input_rows_count) { - size_t rows = str_column->size(); res_data.reserve(str_column->getChars().size() / 2); - res_offsets.reserve(rows); + res_offsets.reserve(input_rows_count); bool all_ascii = isAllASCII(str_column->getChars().data(), str_column->getChars().size()) && isAllASCII(reinterpret_cast(delim.data()), delim.size()); std::unique_ptr searcher = !is_utf8 || all_ascii ? nullptr : std::make_unique(delim.data(), delim.size()); - for (size_t i = 0; i < rows; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { StringRef str_ref = str_column->getDataAt(i); diff --git a/src/Functions/subtractNanoseconds.cpp b/src/Functions/subtractNanoseconds.cpp index fffb4eae37a..360c5ecd9cb 100644 --- a/src/Functions/subtractNanoseconds.cpp +++ b/src/Functions/subtractNanoseconds.cpp @@ -6,6 +6,7 @@ namespace DB { using FunctionSubtractNanoseconds = FunctionDateOrDateTimeAddInterval; + REGISTER_FUNCTION(SubtractNanoseconds) { factory.registerFunction(); diff --git a/src/Functions/throwIf.cpp b/src/Functions/throwIf.cpp index becc6d2f772..e317c65c622 100644 --- a/src/Functions/throwIf.cpp +++ b/src/Functions/throwIf.cpp @@ -152,7 +152,7 @@ private: return nullptr; } - bool allow_custom_error_code_argument; + const bool allow_custom_error_code_argument; }; } diff --git a/src/Functions/timeSlots.cpp b/src/Functions/timeSlots.cpp index 040495ab023..b62bb20c64e 100644 --- a/src/Functions/timeSlots.cpp +++ b/src/Functions/timeSlots.cpp @@ -41,18 +41,17 @@ struct TimeSlotsImpl /// The following three methods process DateTime type static void vectorVector( const PaddedPODArray & starts, const PaddedPODArray & durations, UInt32 time_slot_size, - PaddedPODArray & result_values, ColumnArray::Offsets & result_offsets) + PaddedPODArray & result_values, ColumnArray::Offsets & result_offsets, + size_t input_rows_count) { if (time_slot_size == 0) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Time slot size cannot be zero"); - size_t size = starts.size(); - - result_offsets.resize(size); - result_values.reserve(size); + result_offsets.resize(input_rows_count); + result_values.reserve(input_rows_count); ColumnArray::Offset current_offset = 0; - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { for (UInt32 value = starts[i] / time_slot_size, end = (starts[i] + durations[i]) / time_slot_size; value <= end; ++value) { @@ -66,18 +65,17 @@ struct TimeSlotsImpl static void vectorConstant( const PaddedPODArray & starts, UInt32 duration, UInt32 time_slot_size, - PaddedPODArray & result_values, ColumnArray::Offsets & result_offsets) + PaddedPODArray & result_values, ColumnArray::Offsets & result_offsets, + size_t input_rows_count) { if (time_slot_size == 0) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Time slot size cannot be zero"); - size_t size = starts.size(); - - result_offsets.resize(size); - result_values.reserve(size); + result_offsets.resize(input_rows_count); + result_values.reserve(input_rows_count); ColumnArray::Offset current_offset = 0; - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { for (UInt32 value = starts[i] / time_slot_size, end = (starts[i] + duration) / time_slot_size; value <= end; ++value) { @@ -91,18 +89,17 @@ struct TimeSlotsImpl static void constantVector( UInt32 start, const PaddedPODArray & durations, UInt32 time_slot_size, - PaddedPODArray & result_values, ColumnArray::Offsets & result_offsets) + PaddedPODArray & result_values, ColumnArray::Offsets & result_offsets, + size_t input_rows_count) { if (time_slot_size == 0) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Time slot size cannot be zero"); - size_t size = durations.size(); - - result_offsets.resize(size); - result_values.reserve(size); + result_offsets.resize(input_rows_count); + result_values.reserve(input_rows_count); ColumnArray::Offset current_offset = 0; - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { for (UInt32 value = start / time_slot_size, end = (start + durations[i]) / time_slot_size; value <= end; ++value) { @@ -120,12 +117,11 @@ struct TimeSlotsImpl */ static NO_SANITIZE_UNDEFINED void vectorVector( const PaddedPODArray & starts, const PaddedPODArray & durations, Decimal64 time_slot_size, - PaddedPODArray & result_values, ColumnArray::Offsets & result_offsets, UInt16 dt_scale, UInt16 duration_scale, UInt16 time_slot_scale) + PaddedPODArray & result_values, ColumnArray::Offsets & result_offsets, UInt16 dt_scale, UInt16 duration_scale, UInt16 time_slot_scale, + size_t input_rows_count) { - size_t size = starts.size(); - - result_offsets.resize(size); - result_values.reserve(size); + result_offsets.resize(input_rows_count); + result_values.reserve(input_rows_count); /// Modify all units to have same scale UInt16 max_scale = std::max({dt_scale, duration_scale, time_slot_scale}); @@ -139,7 +135,7 @@ struct TimeSlotsImpl if (time_slot_size == 0) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Time slot size cannot be zero"); - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { for (DateTime64 value = (starts[i] * dt_multiplier) / time_slot_size, end = (starts[i] * dt_multiplier + durations[i] * dur_multiplier) / time_slot_size; value <= end; value += 1) { @@ -152,12 +148,11 @@ struct TimeSlotsImpl static NO_SANITIZE_UNDEFINED void vectorConstant( const PaddedPODArray & starts, Decimal64 duration, Decimal64 time_slot_size, - PaddedPODArray & result_values, ColumnArray::Offsets & result_offsets, UInt16 dt_scale, UInt16 duration_scale, UInt16 time_slot_scale) + PaddedPODArray & result_values, ColumnArray::Offsets & result_offsets, UInt16 dt_scale, UInt16 duration_scale, UInt16 time_slot_scale, + size_t input_rows_count) { - size_t size = starts.size(); - - result_offsets.resize(size); - result_values.reserve(size); + result_offsets.resize(input_rows_count); + result_values.reserve(input_rows_count); /// Modify all units to have same scale UInt16 max_scale = std::max({dt_scale, duration_scale, time_slot_scale}); @@ -172,7 +167,7 @@ struct TimeSlotsImpl if (time_slot_size == 0) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Time slot size cannot be zero"); - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { for (DateTime64 value = (starts[i] * dt_multiplier) / time_slot_size, end = (starts[i] * dt_multiplier + duration) / time_slot_size; value <= end; value += 1) { @@ -185,12 +180,11 @@ struct TimeSlotsImpl static NO_SANITIZE_UNDEFINED void constantVector( DateTime64 start, const PaddedPODArray & durations, Decimal64 time_slot_size, - PaddedPODArray & result_values, ColumnArray::Offsets & result_offsets, UInt16 dt_scale, UInt16 duration_scale, UInt16 time_slot_scale) + PaddedPODArray & result_values, ColumnArray::Offsets & result_offsets, UInt16 dt_scale, UInt16 duration_scale, UInt16 time_slot_scale, + size_t input_rows_count) { - size_t size = durations.size(); - - result_offsets.resize(size); - result_values.reserve(size); + result_offsets.resize(input_rows_count); + result_values.reserve(input_rows_count); /// Modify all units to have same scale UInt16 max_scale = std::max({dt_scale, duration_scale, time_slot_scale}); @@ -205,7 +199,7 @@ struct TimeSlotsImpl if (time_slot_size == 0) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Time slot size cannot be zero"); - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { for (DateTime64 value = start / time_slot_size, end = (start + durations[i] * dur_multiplier) / time_slot_size; value <= end; value += 1) { @@ -282,7 +276,7 @@ public: } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { if (WhichDataType(arguments[0].type).isDateTime()) { @@ -308,17 +302,17 @@ public: if (dt_starts && durations) { - TimeSlotsImpl::vectorVector(dt_starts->getData(), durations->getData(), time_slot_size, res_values, res->getOffsets()); + TimeSlotsImpl::vectorVector(dt_starts->getData(), durations->getData(), time_slot_size, res_values, res->getOffsets(), input_rows_count); return res; } else if (dt_starts && const_durations) { - TimeSlotsImpl::vectorConstant(dt_starts->getData(), const_durations->getValue(), time_slot_size, res_values, res->getOffsets()); + TimeSlotsImpl::vectorConstant(dt_starts->getData(), const_durations->getValue(), time_slot_size, res_values, res->getOffsets(), input_rows_count); return res; } else if (dt_const_starts && durations) { - TimeSlotsImpl::constantVector(dt_const_starts->getValue(), durations->getData(), time_slot_size, res_values, res->getOffsets()); + TimeSlotsImpl::constantVector(dt_const_starts->getValue(), durations->getData(), time_slot_size, res_values, res->getOffsets(), input_rows_count); return res; } } @@ -353,21 +347,21 @@ public: if (starts && durations) { TimeSlotsImpl::vectorVector(starts->getData(), durations->getData(), time_slot_size, res_values, res->getOffsets(), - start_time_scale, duration_scale, time_slot_scale); + start_time_scale, duration_scale, time_slot_scale, input_rows_count); return res; } else if (starts && const_durations) { TimeSlotsImpl::vectorConstant( starts->getData(), const_durations->getValue(), time_slot_size, res_values, res->getOffsets(), - start_time_scale, duration_scale, time_slot_scale); + start_time_scale, duration_scale, time_slot_scale, input_rows_count); return res; } else if (const_starts && durations) { TimeSlotsImpl::constantVector( const_starts->getValue(), durations->getData(), time_slot_size, res_values, res->getOffsets(), - start_time_scale, duration_scale, time_slot_scale); + start_time_scale, duration_scale, time_slot_scale, input_rows_count); return res; } } diff --git a/src/Functions/toDecimalString.cpp b/src/Functions/toDecimalString.cpp index 523948a5396..3566ebc93ad 100644 --- a/src/Functions/toDecimalString.cpp +++ b/src/Functions/toDecimalString.cpp @@ -54,9 +54,9 @@ private: /// For operations with Integer/Float template void vectorConstant(const FromVectorType & vec_from, UInt8 precision, - ColumnString::Chars & vec_to, ColumnString::Offsets & result_offsets) const + ColumnString::Chars & vec_to, ColumnString::Offsets & result_offsets, + size_t input_rows_count) const { - size_t input_rows_count = vec_from.size(); result_offsets.resize(input_rows_count); /// Buffer is used here and in functions below because resulting size cannot be precisely anticipated, @@ -74,9 +74,9 @@ private: template void vectorVector(const FirstArgVectorType & vec_from, const ColumnVector::Container & vec_precision, - ColumnString::Chars & vec_to, ColumnString::Offsets & result_offsets) const + ColumnString::Chars & vec_to, ColumnString::Offsets & result_offsets, + size_t input_rows_count) const { - size_t input_rows_count = vec_from.size(); result_offsets.resize(input_rows_count); WriteBufferFromVector buf_to(vec_to); @@ -98,7 +98,8 @@ private: /// For operations with Decimal template void vectorConstant(const FirstArgVectorType & vec_from, UInt8 precision, - ColumnString::Chars & vec_to, ColumnString::Offsets & result_offsets, UInt8 from_scale) const + ColumnString::Chars & vec_to, ColumnString::Offsets & result_offsets, UInt8 from_scale, + size_t input_rows_count) const { /// There are no more than 77 meaning digits (as it is the max length of UInt256). So we can limit it with 77. constexpr size_t max_digits = std::numeric_limits::digits10; @@ -107,7 +108,6 @@ private: "Too many fractional digits requested for Decimal, must not be more than {}", max_digits); WriteBufferFromVector buf_to(vec_to); - size_t input_rows_count = vec_from.size(); result_offsets.resize(input_rows_count); for (size_t i = 0; i < input_rows_count; ++i) @@ -121,9 +121,9 @@ private: template void vectorVector(const FirstArgVectorType & vec_from, const ColumnVector::Container & vec_precision, - ColumnString::Chars & vec_to, ColumnString::Offsets & result_offsets, UInt8 from_scale) const + ColumnString::Chars & vec_to, ColumnString::Offsets & result_offsets, UInt8 from_scale, + size_t input_rows_count) const { - size_t input_rows_count = vec_from.size(); result_offsets.resize(input_rows_count); WriteBufferFromVector buf_to(vec_to); @@ -182,28 +182,28 @@ private: } public: - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { switch (arguments[0].type->getTypeId()) { - case TypeIndex::UInt8: return executeType(arguments); - case TypeIndex::UInt16: return executeType(arguments); - case TypeIndex::UInt32: return executeType(arguments); - case TypeIndex::UInt64: return executeType(arguments); - case TypeIndex::UInt128: return executeType(arguments); - case TypeIndex::UInt256: return executeType(arguments); - case TypeIndex::Int8: return executeType(arguments); - case TypeIndex::Int16: return executeType(arguments); - case TypeIndex::Int32: return executeType(arguments); - case TypeIndex::Int64: return executeType(arguments); - case TypeIndex::Int128: return executeType(arguments); - case TypeIndex::Int256: return executeType(arguments); - case TypeIndex::Float32: return executeType(arguments); - case TypeIndex::Float64: return executeType(arguments); - case TypeIndex::Decimal32: return executeType(arguments); - case TypeIndex::Decimal64: return executeType(arguments); - case TypeIndex::Decimal128: return executeType(arguments); - case TypeIndex::Decimal256: return executeType(arguments); + case TypeIndex::UInt8: return executeType(arguments, input_rows_count); + case TypeIndex::UInt16: return executeType(arguments, input_rows_count); + case TypeIndex::UInt32: return executeType(arguments, input_rows_count); + case TypeIndex::UInt64: return executeType(arguments, input_rows_count); + case TypeIndex::UInt128: return executeType(arguments, input_rows_count); + case TypeIndex::UInt256: return executeType(arguments, input_rows_count); + case TypeIndex::Int8: return executeType(arguments, input_rows_count); + case TypeIndex::Int16: return executeType(arguments, input_rows_count); + case TypeIndex::Int32: return executeType(arguments, input_rows_count); + case TypeIndex::Int64: return executeType(arguments, input_rows_count); + case TypeIndex::Int128: return executeType(arguments, input_rows_count); + case TypeIndex::Int256: return executeType(arguments, input_rows_count); + case TypeIndex::Float32: return executeType(arguments, input_rows_count); + case TypeIndex::Float64: return executeType(arguments, input_rows_count); + case TypeIndex::Decimal32: return executeType(arguments, input_rows_count); + case TypeIndex::Decimal64: return executeType(arguments, input_rows_count); + case TypeIndex::Decimal128: return executeType(arguments, input_rows_count); + case TypeIndex::Decimal256: return executeType(arguments, input_rows_count); default: throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of argument of function {}", arguments[0].column->getName(), getName()); @@ -212,7 +212,7 @@ public: private: template - ColumnPtr executeType(const ColumnsWithTypeAndName & arguments) const + ColumnPtr executeType(const ColumnsWithTypeAndName & arguments, size_t input_rows_count) const { const auto * precision_col = checkAndGetColumn>(arguments[1].column.get()); const auto * precision_col_const = checkAndGetColumnConst>(arguments[1].column.get()); @@ -230,9 +230,9 @@ private: { UInt8 from_scale = from_col->getScale(); if (precision_col_const) - vectorConstant(from_col->getData(), precision_col_const->template getValue(), result_chars, result_offsets, from_scale); + vectorConstant(from_col->getData(), precision_col_const->template getValue(), result_chars, result_offsets, from_scale, input_rows_count); else if (precision_col) - vectorVector(from_col->getData(), precision_col->getData(), result_chars, result_offsets, from_scale); + vectorVector(from_col->getData(), precision_col->getData(), result_chars, result_offsets, from_scale, input_rows_count); else throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of second argument of function formatDecimal", arguments[1].column->getName()); } @@ -245,9 +245,9 @@ private: if (from_col) { if (precision_col_const) - vectorConstant(from_col->getData(), precision_col_const->template getValue(), result_chars, result_offsets); + vectorConstant(from_col->getData(), precision_col_const->template getValue(), result_chars, result_offsets, input_rows_count); else if (precision_col) - vectorVector(from_col->getData(), precision_col->getData(), result_chars, result_offsets); + vectorVector(from_col->getData(), precision_col->getData(), result_chars, result_offsets, input_rows_count); else throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of second argument of function formatDecimal", arguments[1].column->getName()); diff --git a/src/Functions/toStartOfInterval.cpp b/src/Functions/toStartOfInterval.cpp index 50442d1b448..21b7cf895d2 100644 --- a/src/Functions/toStartOfInterval.cpp +++ b/src/Functions/toStartOfInterval.cpp @@ -147,19 +147,20 @@ public: std::unreachable(); } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /* input_rows_count */) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override { const auto & time_column = arguments[0]; const auto & interval_column = arguments[1]; const auto & time_zone = extractTimeZoneFromFunctionArguments(arguments, 2, 0); - auto result_column = dispatchForTimeColumn(time_column, interval_column, result_type, time_zone); + auto result_column = dispatchForTimeColumn(time_column, interval_column, result_type, time_zone, input_rows_count); return result_column; } private: ColumnPtr dispatchForTimeColumn( const ColumnWithTypeAndName & time_column, const ColumnWithTypeAndName & interval_column, - const DataTypePtr & result_type, const DateLUTImpl & time_zone) const + const DataTypePtr & result_type, const DateLUTImpl & time_zone, + size_t input_rows_count) const { const auto & time_column_type = *time_column.type.get(); const auto & time_column_col = *time_column.column.get(); @@ -170,19 +171,19 @@ private: auto scale = assert_cast(time_column_type).getScale(); if (time_column_vec) - return dispatchForIntervalColumn(assert_cast(time_column_type), *time_column_vec, interval_column, result_type, time_zone, scale); + return dispatchForIntervalColumn(assert_cast(time_column_type), *time_column_vec, interval_column, result_type, time_zone, input_rows_count, scale); } else if (isDateTime(time_column_type)) { const auto * time_column_vec = checkAndGetColumn(&time_column_col); if (time_column_vec) - return dispatchForIntervalColumn(assert_cast(time_column_type), *time_column_vec, interval_column, result_type, time_zone); + return dispatchForIntervalColumn(assert_cast(time_column_type), *time_column_vec, interval_column, result_type, time_zone, input_rows_count); } else if (isDate(time_column_type)) { const auto * time_column_vec = checkAndGetColumn(&time_column_col); if (time_column_vec) - return dispatchForIntervalColumn(assert_cast(time_column_type), *time_column_vec, interval_column, result_type, time_zone); + return dispatchForIntervalColumn(assert_cast(time_column_type), *time_column_vec, interval_column, result_type, time_zone, input_rows_count); } throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal column for 1st argument of function {}, expected a Date, DateTime or DateTime64", getName()); } @@ -190,7 +191,7 @@ private: template ColumnPtr dispatchForIntervalColumn( const TimeDataType & time_data_type, const TimeColumnType & time_column, const ColumnWithTypeAndName & interval_column, - const DataTypePtr & result_type, const DateLUTImpl & time_zone, UInt16 scale = 1) const + const DataTypePtr & result_type, const DateLUTImpl & time_zone, size_t input_rows_count, UInt16 scale = 1) const { const auto * interval_type = checkAndGetDataType(interval_column.type.get()); if (!interval_type) @@ -207,27 +208,27 @@ private: switch (interval_type->getKind()) // NOLINT(bugprone-switch-missing-default-case) { case IntervalKind::Kind::Nanosecond: - return execute(time_data_type, time_column, num_units, result_type, time_zone, scale); + return execute(time_data_type, time_column, num_units, result_type, time_zone, input_rows_count, scale); case IntervalKind::Kind::Microsecond: - return execute(time_data_type, time_column, num_units, result_type, time_zone, scale); + return execute(time_data_type, time_column, num_units, result_type, time_zone, input_rows_count, scale); case IntervalKind::Kind::Millisecond: - return execute(time_data_type, time_column, num_units, result_type, time_zone, scale); + return execute(time_data_type, time_column, num_units, result_type, time_zone, input_rows_count, scale); case IntervalKind::Kind::Second: - return execute(time_data_type, time_column, num_units, result_type, time_zone, scale); + return execute(time_data_type, time_column, num_units, result_type, time_zone, input_rows_count, scale); case IntervalKind::Kind::Minute: - return execute(time_data_type, time_column, num_units, result_type, time_zone, scale); + return execute(time_data_type, time_column, num_units, result_type, time_zone, input_rows_count, scale); case IntervalKind::Kind::Hour: - return execute(time_data_type, time_column, num_units, result_type, time_zone, scale); + return execute(time_data_type, time_column, num_units, result_type, time_zone, input_rows_count, scale); case IntervalKind::Kind::Day: - return execute(time_data_type, time_column, num_units, result_type, time_zone, scale); + return execute(time_data_type, time_column, num_units, result_type, time_zone, input_rows_count, scale); case IntervalKind::Kind::Week: - return execute(time_data_type, time_column, num_units, result_type, time_zone, scale); + return execute(time_data_type, time_column, num_units, result_type, time_zone, input_rows_count, scale); case IntervalKind::Kind::Month: - return execute(time_data_type, time_column, num_units, result_type, time_zone, scale); + return execute(time_data_type, time_column, num_units, result_type, time_zone, input_rows_count, scale); case IntervalKind::Kind::Quarter: - return execute(time_data_type, time_column, num_units, result_type, time_zone, scale); + return execute(time_data_type, time_column, num_units, result_type, time_zone, input_rows_count, scale); case IntervalKind::Kind::Year: - return execute(time_data_type, time_column, num_units, result_type, time_zone, scale); + return execute(time_data_type, time_column, num_units, result_type, time_zone, input_rows_count, scale); } std::unreachable(); @@ -236,22 +237,21 @@ private: template ColumnPtr execute( const TimeDataType &, const TimeColumnType & time_column_type, Int64 num_units, - const DataTypePtr & result_type, const DateLUTImpl & time_zone, UInt16 scale) const + const DataTypePtr & result_type, const DateLUTImpl & time_zone, size_t input_rows_count, UInt16 scale) const { using ResultColumnType = typename ResultDataType::ColumnType; using ResultFieldType = typename ResultDataType::FieldType; const auto & time_data = time_column_type.getData(); - size_t size = time_data.size(); auto result_col = result_type->createColumn(); auto * col_to = assert_cast(result_col.get()); auto & result_data = col_to->getData(); - result_data.resize(size); + result_data.resize(input_rows_count); Int64 scale_multiplier = DecimalUtils::scaleMultiplier(scale); - for (size_t i = 0; i != size; ++i) + for (size_t i = 0; i != input_rows_count; ++i) result_data[i] = static_cast(ToStartOfInterval::execute(time_data[i], num_units, time_zone, scale_multiplier)); return result_col; diff --git a/src/Functions/tokenExtractors.cpp b/src/Functions/tokenExtractors.cpp index e7dcb5cced3..1bbf313fbae 100644 --- a/src/Functions/tokenExtractors.cpp +++ b/src/Functions/tokenExtractors.cpp @@ -73,7 +73,7 @@ public: return std::make_shared(std::make_shared()); } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { auto column_offsets = ColumnArray::ColumnOffsets::create(); @@ -90,9 +90,9 @@ public: auto input_column = arguments[0].column; if (const auto * column_string = checkAndGetColumn(input_column.get())) - executeImpl(extractor, *column_string, *result_column_string, *column_offsets); + executeImpl(extractor, *column_string, *result_column_string, *column_offsets, input_rows_count); else if (const auto * column_fixed_string = checkAndGetColumn(input_column.get())) - executeImpl(extractor, *column_fixed_string, *result_column_string, *column_offsets); + executeImpl(extractor, *column_fixed_string, *result_column_string, *column_offsets, input_rows_count); return ColumnArray::create(std::move(result_column_string), std::move(column_offsets)); } @@ -105,9 +105,9 @@ public: auto input_column = arguments[0].column; if (const auto * column_string = checkAndGetColumn(input_column.get())) - executeImpl(extractor, *column_string, *result_column_string, *column_offsets); + executeImpl(extractor, *column_string, *result_column_string, *column_offsets, input_rows_count); else if (const auto * column_fixed_string = checkAndGetColumn(input_column.get())) - executeImpl(extractor, *column_fixed_string, *result_column_string, *column_offsets); + executeImpl(extractor, *column_fixed_string, *result_column_string, *column_offsets, input_rows_count); return ColumnArray::create(std::move(result_column_string), std::move(column_offsets)); } @@ -120,15 +120,15 @@ private: const ExtractorType & extractor, StringColumnType & input_data_column, ResultStringColumnType & result_data_column, - ColumnArray::ColumnOffsets & offsets_column) const + ColumnArray::ColumnOffsets & offsets_column, + size_t input_rows_count) const { size_t current_tokens_size = 0; auto & offsets_data = offsets_column.getData(); - size_t column_size = input_data_column.size(); - offsets_data.resize(column_size); + offsets_data.resize(input_rows_count); - for (size_t i = 0; i < column_size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { auto data = input_data_column.getDataAt(i); diff --git a/src/Functions/transform.cpp b/src/Functions/transform.cpp index 68500779f93..0dfc9197845 100644 --- a/src/Functions/transform.cpp +++ b/src/Functions/transform.cpp @@ -173,30 +173,30 @@ namespace } else if (cache.table_num_to_idx) { - if (!executeNum>(in, *column_result, default_non_const, *in_casted) - && !executeNum>(in, *column_result, default_non_const, *in_casted) - && !executeNum>(in, *column_result, default_non_const, *in_casted) - && !executeNum>(in, *column_result, default_non_const, *in_casted) - && !executeNum>(in, *column_result, default_non_const, *in_casted) - && !executeNum>(in, *column_result, default_non_const, *in_casted) - && !executeNum>(in, *column_result, default_non_const, *in_casted) - && !executeNum>(in, *column_result, default_non_const, *in_casted) - && !executeNum>(in, *column_result, default_non_const, *in_casted) - && !executeNum>(in, *column_result, default_non_const, *in_casted) - && !executeNum>(in, *column_result, default_non_const, *in_casted) - && !executeNum>(in, *column_result, default_non_const, *in_casted)) + if (!executeNum>(in, *column_result, default_non_const, *in_casted, input_rows_count) + && !executeNum>(in, *column_result, default_non_const, *in_casted, input_rows_count) + && !executeNum>(in, *column_result, default_non_const, *in_casted, input_rows_count) + && !executeNum>(in, *column_result, default_non_const, *in_casted, input_rows_count) + && !executeNum>(in, *column_result, default_non_const, *in_casted, input_rows_count) + && !executeNum>(in, *column_result, default_non_const, *in_casted, input_rows_count) + && !executeNum>(in, *column_result, default_non_const, *in_casted, input_rows_count) + && !executeNum>(in, *column_result, default_non_const, *in_casted, input_rows_count) + && !executeNum>(in, *column_result, default_non_const, *in_casted, input_rows_count) + && !executeNum>(in, *column_result, default_non_const, *in_casted, input_rows_count) + && !executeNum>(in, *column_result, default_non_const, *in_casted, input_rows_count) + && !executeNum>(in, *column_result, default_non_const, *in_casted, input_rows_count)) { throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}", in->getName(), getName()); } } else if (cache.table_string_to_idx) { - if (!executeString(in, *column_result, default_non_const, *in_casted)) - executeContiguous(in, *column_result, default_non_const, *in_casted); + if (!executeString(in, *column_result, default_non_const, *in_casted, input_rows_count)) + executeContiguous(in, *column_result, default_non_const, *in_casted, input_rows_count); } else if (cache.table_anything_to_idx) { - executeAnything(in, *column_result, default_non_const, *in_casted); + executeAnything(in, *column_result, default_non_const, *in_casted, input_rows_count); } else throw Exception(ErrorCodes::LOGICAL_ERROR, "State of the function `transform` is not initialized"); @@ -217,12 +217,11 @@ namespace return impl->execute(args, result_type, input_rows_count); } - void executeAnything(const IColumn * in, IColumn & column_result, const ColumnPtr default_non_const, const IColumn & in_casted) const + void executeAnything(const IColumn * in, IColumn & column_result, const ColumnPtr default_non_const, const IColumn & in_casted, size_t input_rows_count) const { - const size_t size = in->size(); const auto & table = *cache.table_anything_to_idx; - column_result.reserve(size); - for (size_t i = 0; i < size; ++i) + column_result.reserve(input_rows_count); + for (size_t i = 0; i < input_rows_count; ++i) { SipHash hash; in->updateHashWithValue(i, hash); @@ -239,12 +238,11 @@ namespace } } - void executeContiguous(const IColumn * in, IColumn & column_result, const ColumnPtr default_non_const, const IColumn & in_casted) const + void executeContiguous(const IColumn * in, IColumn & column_result, const ColumnPtr default_non_const, const IColumn & in_casted, size_t input_rows_count) const { - const size_t size = in->size(); const auto & table = *cache.table_string_to_idx; - column_result.reserve(size); - for (size_t i = 0; i < size; ++i) + column_result.reserve(input_rows_count); + for (size_t i = 0; i < input_rows_count; ++i) { const auto * it = table.find(in->getDataAt(i)); if (it) @@ -259,7 +257,7 @@ namespace } template - bool executeNum(const IColumn * in_untyped, IColumn & column_result, const ColumnPtr default_non_const, const IColumn & in_casted) const + bool executeNum(const IColumn * in_untyped, IColumn & column_result, const ColumnPtr default_non_const, const IColumn & in_casted, size_t input_rows_count) const { const auto * const in = checkAndGetColumn(in_untyped); if (!in) @@ -269,24 +267,23 @@ namespace if constexpr (std::is_same_v, T> || std::is_same_v, T>) in_scale = in->getScale(); - if (!executeNumToString(pod, column_result, default_non_const) - && !executeNumToNum>(pod, column_result, default_non_const, in_scale) - && !executeNumToNum>(pod, column_result, default_non_const, in_scale) - && !executeNumToNum>(pod, column_result, default_non_const, in_scale) - && !executeNumToNum>(pod, column_result, default_non_const, in_scale) - && !executeNumToNum>(pod, column_result, default_non_const, in_scale) - && !executeNumToNum>(pod, column_result, default_non_const, in_scale) - && !executeNumToNum>(pod, column_result, default_non_const, in_scale) - && !executeNumToNum>(pod, column_result, default_non_const, in_scale) - && !executeNumToNum>(pod, column_result, default_non_const, in_scale) - && !executeNumToNum>(pod, column_result, default_non_const, in_scale) - && !executeNumToNum>(pod, column_result, default_non_const, in_scale) - && !executeNumToNum>(pod, column_result, default_non_const, in_scale)) + if (!executeNumToString(pod, column_result, default_non_const, input_rows_count) + && !executeNumToNum>(pod, column_result, default_non_const, in_scale, input_rows_count) + && !executeNumToNum>(pod, column_result, default_non_const, in_scale, input_rows_count) + && !executeNumToNum>(pod, column_result, default_non_const, in_scale, input_rows_count) + && !executeNumToNum>(pod, column_result, default_non_const, in_scale, input_rows_count) + && !executeNumToNum>(pod, column_result, default_non_const, in_scale, input_rows_count) + && !executeNumToNum>(pod, column_result, default_non_const, in_scale, input_rows_count) + && !executeNumToNum>(pod, column_result, default_non_const, in_scale, input_rows_count) + && !executeNumToNum>(pod, column_result, default_non_const, in_scale, input_rows_count) + && !executeNumToNum>(pod, column_result, default_non_const, in_scale, input_rows_count) + && !executeNumToNum>(pod, column_result, default_non_const, in_scale, input_rows_count) + && !executeNumToNum>(pod, column_result, default_non_const, in_scale, input_rows_count) + && !executeNumToNum>(pod, column_result, default_non_const, in_scale, input_rows_count)) { - const size_t size = pod.size(); const auto & table = *cache.table_num_to_idx; - column_result.reserve(size); - for (size_t i = 0; i < size; ++i) + column_result.reserve(input_rows_count); + for (size_t i = 0; i < input_rows_count; ++i) { const auto * it = table.find(bit_cast(pod[i])); if (it) @@ -303,14 +300,13 @@ namespace } template - bool executeNumToString(const PaddedPODArray & pod, IColumn & column_result, const ColumnPtr default_non_const) const + bool executeNumToString(const PaddedPODArray & pod, IColumn & column_result, const ColumnPtr default_non_const, size_t input_rows_count) const { auto * out = typeid_cast(&column_result); if (!out) return false; auto & out_offs = out->getOffsets(); - const size_t size = pod.size(); - out_offs.resize(size); + out_offs.resize(input_rows_count); auto & out_chars = out->getChars(); const auto * to_col = assert_cast(cache.to_column.get()); @@ -325,14 +321,14 @@ namespace const auto & def_offs = def->getOffsets(); const auto * def_data = def_chars.data(); auto def_size = def_offs[0]; - executeNumToStringHelper(table, pod, out_chars, out_offs, to_chars, to_offs, def_data, def_size, size); + executeNumToStringHelper(table, pod, out_chars, out_offs, to_chars, to_offs, def_data, def_size, input_rows_count); } else { const auto * def = assert_cast(default_non_const.get()); const auto & def_chars = def->getChars(); const auto & def_offs = def->getOffsets(); - executeNumToStringHelper(table, pod, out_chars, out_offs, to_chars, to_offs, def_chars, def_offs, size); + executeNumToStringHelper(table, pod, out_chars, out_offs, to_chars, to_offs, def_chars, def_offs, input_rows_count); } return true; } @@ -347,10 +343,10 @@ namespace const ColumnString::Offsets & to_offsets, const DefData & def_data, const DefOffs & def_offsets, - const size_t size) const + size_t input_rows_count) const { size_t out_cur_off = 0; - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { const char8_t * to = nullptr; size_t to_size = 0; @@ -382,14 +378,13 @@ namespace template bool executeNumToNum( - const PaddedPODArray & pod, IColumn & column_result, const ColumnPtr default_non_const, const UInt32 in_scale) const + const PaddedPODArray & pod, IColumn & column_result, ColumnPtr default_non_const, UInt32 in_scale, size_t input_rows_count) const { auto * out = typeid_cast(&column_result); if (!out) return false; auto & out_pod = out->getData(); - const size_t size = pod.size(); - out_pod.resize(size); + out_pod.resize(input_rows_count); UInt32 out_scale = 0; if constexpr (std::is_same_v, T> || std::is_same_v, T>) out_scale = out->getScale(); @@ -399,15 +394,15 @@ namespace if (cache.default_column) { const auto const_def = assert_cast(cache.default_column.get())->getData()[0]; - executeNumToNumHelper(table, pod, out_pod, to_pod, const_def, size, out_scale, out_scale); + executeNumToNumHelper(table, pod, out_pod, to_pod, const_def, input_rows_count, out_scale, out_scale); } else if (default_non_const) { const auto & nconst_def = assert_cast(default_non_const.get())->getData(); - executeNumToNumHelper(table, pod, out_pod, to_pod, nconst_def, size, out_scale, out_scale); + executeNumToNumHelper(table, pod, out_pod, to_pod, nconst_def, input_rows_count, out_scale, out_scale); } else - executeNumToNumHelper(table, pod, out_pod, to_pod, pod, size, out_scale, in_scale); + executeNumToNumHelper(table, pod, out_pod, to_pod, pod, input_rows_count, out_scale, in_scale); return true; } @@ -418,11 +413,11 @@ namespace PaddedPODArray & out_pod, const PaddedPODArray & to_pod, const Def & def, - const size_t size, - const UInt32 out_scale, - const UInt32 def_scale) const + size_t input_rows_count, + UInt32 out_scale, + UInt32 def_scale) const { - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { const auto * it = table.find(bit_cast(pod[i])); if (it) @@ -450,7 +445,7 @@ namespace } } - bool executeString(const IColumn * in_untyped, IColumn & column_result, const ColumnPtr default_non_const, const IColumn & in_casted) const + bool executeString(const IColumn * in_untyped, IColumn & column_result, const ColumnPtr default_non_const, const IColumn & in_casted, size_t input_rows_count) const { const auto * const in = checkAndGetColumn(in_untyped); if (!in) @@ -458,19 +453,19 @@ namespace const auto & data = in->getChars(); const auto & offsets = in->getOffsets(); - if (!executeStringToString(data, offsets, column_result, default_non_const) - && !executeStringToNum>(data, offsets, column_result, default_non_const) - && !executeStringToNum>(data, offsets, column_result, default_non_const) - && !executeStringToNum>(data, offsets, column_result, default_non_const) - && !executeStringToNum>(data, offsets, column_result, default_non_const) - && !executeStringToNum>(data, offsets, column_result, default_non_const) - && !executeStringToNum>(data, offsets, column_result, default_non_const) - && !executeStringToNum>(data, offsets, column_result, default_non_const) - && !executeStringToNum>(data, offsets, column_result, default_non_const) - && !executeStringToNum>(data, offsets, column_result, default_non_const) - && !executeStringToNum>(data, offsets, column_result, default_non_const) - && !executeStringToNum>(data, offsets, column_result, default_non_const) - && !executeStringToNum>(data, offsets, column_result, default_non_const)) + if (!executeStringToString(data, offsets, column_result, default_non_const, input_rows_count) + && !executeStringToNum>(data, offsets, column_result, default_non_const, input_rows_count) + && !executeStringToNum>(data, offsets, column_result, default_non_const, input_rows_count) + && !executeStringToNum>(data, offsets, column_result, default_non_const, input_rows_count) + && !executeStringToNum>(data, offsets, column_result, default_non_const, input_rows_count) + && !executeStringToNum>(data, offsets, column_result, default_non_const, input_rows_count) + && !executeStringToNum>(data, offsets, column_result, default_non_const, input_rows_count) + && !executeStringToNum>(data, offsets, column_result, default_non_const, input_rows_count) + && !executeStringToNum>(data, offsets, column_result, default_non_const, input_rows_count) + && !executeStringToNum>(data, offsets, column_result, default_non_const, input_rows_count) + && !executeStringToNum>(data, offsets, column_result, default_non_const, input_rows_count) + && !executeStringToNum>(data, offsets, column_result, default_non_const, input_rows_count) + && !executeStringToNum>(data, offsets, column_result, default_non_const, input_rows_count)) { const size_t size = offsets.size(); const auto & table = *cache.table_string_to_idx; @@ -497,14 +492,14 @@ namespace const ColumnString::Chars & data, const ColumnString::Offsets & offsets, IColumn & column_result, - const ColumnPtr default_non_const) const + const ColumnPtr default_non_const, + size_t input_rows_count) const { auto * out = typeid_cast(&column_result); if (!out) return false; auto & out_offs = out->getOffsets(); - const size_t size = offsets.size(); - out_offs.resize(size); + out_offs.resize(input_rows_count); auto & out_chars = out->getChars(); const auto * to_col = assert_cast(cache.to_column.get()); @@ -519,18 +514,18 @@ namespace const auto & def_offs = def->getOffsets(); const auto * def_data = def_chars.data(); auto def_size = def_offs[0]; - executeStringToStringHelper(table, data, offsets, out_chars, out_offs, to_chars, to_offs, def_data, def_size, size); + executeStringToStringHelper(table, data, offsets, out_chars, out_offs, to_chars, to_offs, def_data, def_size, input_rows_count); } else if (default_non_const) { const auto * def = assert_cast(default_non_const.get()); const auto & def_chars = def->getChars(); const auto & def_offs = def->getOffsets(); - executeStringToStringHelper(table, data, offsets, out_chars, out_offs, to_chars, to_offs, def_chars, def_offs, size); + executeStringToStringHelper(table, data, offsets, out_chars, out_offs, to_chars, to_offs, def_chars, def_offs, input_rows_count); } else { - executeStringToStringHelper(table, data, offsets, out_chars, out_offs, to_chars, to_offs, data, offsets, size); + executeStringToStringHelper(table, data, offsets, out_chars, out_offs, to_chars, to_offs, data, offsets, input_rows_count); } return true; } @@ -546,11 +541,11 @@ namespace const ColumnString::Offsets & to_offsets, const DefData & def_data, const DefOffs & def_offsets, - const size_t size) const + size_t input_rows_count) const { ColumnString::Offset current_offset = 0; size_t out_cur_off = 0; - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { const char8_t * to = nullptr; size_t to_size = 0; @@ -587,26 +582,26 @@ namespace const ColumnString::Chars & data, const ColumnString::Offsets & offsets, IColumn & column_result, - const ColumnPtr default_non_const) const + const ColumnPtr default_non_const, + size_t input_rows_count) const { auto * out = typeid_cast(&column_result); if (!out) return false; auto & out_pod = out->getData(); - const size_t size = offsets.size(); - out_pod.resize(size); + out_pod.resize(input_rows_count); const auto & to_pod = assert_cast(cache.to_column.get())->getData(); const auto & table = *cache.table_string_to_idx; if (cache.default_column) { const auto const_def = assert_cast(cache.default_column.get())->getData()[0]; - executeStringToNumHelper(table, data, offsets, out_pod, to_pod, const_def, size); + executeStringToNumHelper(table, data, offsets, out_pod, to_pod, const_def, input_rows_count); } else { const auto & nconst_def = assert_cast(default_non_const.get())->getData(); - executeStringToNumHelper(table, data, offsets, out_pod, to_pod, nconst_def, size); + executeStringToNumHelper(table, data, offsets, out_pod, to_pod, nconst_def, input_rows_count); } return true; } @@ -619,10 +614,10 @@ namespace PaddedPODArray & out_pod, const PaddedPODArray & to_pod, const Def & def, - const size_t size) const + size_t input_rows_count) const { ColumnString::Offset current_offset = 0; - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { const StringRef ref{&data[current_offset], offsets[i] - current_offset - 1}; current_offset = offsets[i]; diff --git a/src/Functions/translate.cpp b/src/Functions/translate.cpp index 2df08a5664e..366640d7d20 100644 --- a/src/Functions/translate.cpp +++ b/src/Functions/translate.cpp @@ -52,7 +52,8 @@ struct TranslateImpl const std::string & map_from, const std::string & map_to, ColumnString::Chars & res_data, - ColumnString::Offsets & res_offsets) + ColumnString::Offsets & res_offsets, + size_t input_rows_count) { Map map; fillMapWithValues(map, map_from, map_to); @@ -62,7 +63,7 @@ struct TranslateImpl UInt8 * dst = res_data.data(); - for (UInt64 i = 0; i < offsets.size(); ++i) + for (UInt64 i = 0; i < input_rows_count; ++i) { const UInt8 * src = data.data() + offsets[i - 1]; const UInt8 * src_end = data.data() + offsets[i] - 1; @@ -175,19 +176,20 @@ struct TranslateUTF8Impl const std::string & map_from, const std::string & map_to, ColumnString::Chars & res_data, - ColumnString::Offsets & res_offsets) + ColumnString::Offsets & res_offsets, + size_t input_rows_count) { MapASCII map_ascii; MapUTF8 map; fillMapWithValues(map_ascii, map, map_from, map_to); res_data.resize(data.size()); - res_offsets.resize(offsets.size()); + res_offsets.resize(input_rows_count); UInt8 * dst = res_data.data(); UInt64 data_size = 0; - for (UInt64 i = 0; i < offsets.size(); ++i) + for (UInt64 i = 0; i < input_rows_count; ++i) { const UInt8 * src = data.data() + offsets[i - 1]; const UInt8 * src_end = data.data() + offsets[i] - 1; @@ -311,7 +313,7 @@ public: } } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { const ColumnPtr column_src = arguments[0].column; const ColumnPtr column_map_from = arguments[1].column; @@ -330,7 +332,7 @@ public: if (const ColumnString * col = checkAndGetColumn(column_src.get())) { auto col_res = ColumnString::create(); - Impl::vector(col->getChars(), col->getOffsets(), map_from, map_to, col_res->getChars(), col_res->getOffsets()); + Impl::vector(col->getChars(), col->getOffsets(), map_from, map_to, col_res->getChars(), col_res->getOffsets(), input_rows_count); return col_res; } else if (const ColumnFixedString * col_fixed = checkAndGetColumn(column_src.get())) diff --git a/src/Functions/tupleToNameValuePairs.cpp b/src/Functions/tupleToNameValuePairs.cpp index 998e0da4f0c..92734d3d1fc 100644 --- a/src/Functions/tupleToNameValuePairs.cpp +++ b/src/Functions/tupleToNameValuePairs.cpp @@ -99,16 +99,16 @@ public: return std::make_shared(item_data_type); } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { const IColumn * tuple_col = arguments[0].column.get(); const DataTypeTuple * tuple = checkAndGetDataType(arguments[0].type.get()); - const auto * tuple_col_concrete = assert_cast(tuple_col); + const auto * tuple_col_concrete = assert_cast(tuple_col); auto keys = ColumnString::create(); MutableColumnPtr values = tuple_col_concrete->getColumn(0).cloneEmpty(); auto offsets = ColumnVector::create(); - for (size_t row = 0; row < tuple_col_concrete->size(); ++row) + for (size_t row = 0; row < input_rows_count; ++row) { for (size_t col = 0; col < tuple_col_concrete->tupleSize(); ++col) { From dc2c3fb1ca4653ee006c8cbbbfa32688f19f1992 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Thu, 25 Jul 2024 11:57:14 +0200 Subject: [PATCH 177/661] Revert "Merge pull request #66563 from ClickHouse/delete-bad-test" This reverts commit d0753c8bb60dacfbd99687906fe4efb7665b20fa, reversing changes made to b8202e19baf7ad171e232a431c8a4c3f1c86e63e. Signed-off-by: Azat Khuzhin --- .../0_stateless/02805_distributed_queries_timeouts.reference | 0 .../queries/0_stateless/02805_distributed_queries_timeouts.sql | 3 +++ 2 files changed, 3 insertions(+) create mode 100644 tests/queries/0_stateless/02805_distributed_queries_timeouts.reference create mode 100644 tests/queries/0_stateless/02805_distributed_queries_timeouts.sql diff --git a/tests/queries/0_stateless/02805_distributed_queries_timeouts.reference b/tests/queries/0_stateless/02805_distributed_queries_timeouts.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02805_distributed_queries_timeouts.sql b/tests/queries/0_stateless/02805_distributed_queries_timeouts.sql new file mode 100644 index 00000000000..0b7337d1255 --- /dev/null +++ b/tests/queries/0_stateless/02805_distributed_queries_timeouts.sql @@ -0,0 +1,3 @@ +select * from remote('127.2', view(select sleep(3) from system.one)) settings receive_timeout=1, async_socket_for_remote=0, use_hedged_requests=1 format Null; +select * from remote('127.2', view(select sleep(3) from system.one)) settings receive_timeout=1, async_socket_for_remote=1, use_hedged_requests=0 format Null; +select * from remote('127.2', view(select sleep(3) from system.one)) settings receive_timeout=1, async_socket_for_remote=0, use_hedged_requests=0 format Null; From 1a4730f1f390e468dab2849bd1b2770e0fb2cbe6 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Thu, 25 Jul 2024 12:03:50 +0200 Subject: [PATCH 178/661] Use Distributed table to avoid extra DESC queries Signed-off-by: Azat Khuzhin --- .../0_stateless/02805_distributed_queries_timeouts.sql | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/queries/0_stateless/02805_distributed_queries_timeouts.sql b/tests/queries/0_stateless/02805_distributed_queries_timeouts.sql index 0b7337d1255..bfa39cd78ee 100644 --- a/tests/queries/0_stateless/02805_distributed_queries_timeouts.sql +++ b/tests/queries/0_stateless/02805_distributed_queries_timeouts.sql @@ -1,3 +1,4 @@ -select * from remote('127.2', view(select sleep(3) from system.one)) settings receive_timeout=1, async_socket_for_remote=0, use_hedged_requests=1 format Null; -select * from remote('127.2', view(select sleep(3) from system.one)) settings receive_timeout=1, async_socket_for_remote=1, use_hedged_requests=0 format Null; -select * from remote('127.2', view(select sleep(3) from system.one)) settings receive_timeout=1, async_socket_for_remote=0, use_hedged_requests=0 format Null; +create table dist as system.one engine=Distributed(test_shard_localhost, system, one); +select sleep(3) from dist settings prefer_localhost_replica=0, receive_timeout=1, async_socket_for_remote=0, use_hedged_requests=1 format Null; +select sleep(3) from dist settings prefer_localhost_replica=0, receive_timeout=1, async_socket_for_remote=1, use_hedged_requests=0 format Null; +select sleep(3) from dist settings prefer_localhost_replica=0, receive_timeout=1, async_socket_for_remote=0, use_hedged_requests=0 format Null; From 49732f2966cd793e32234068cf0b87cea9e3eed6 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Thu, 25 Jul 2024 12:06:10 +0200 Subject: [PATCH 179/661] Tune sleep duration/receive_timeout in 02805_distributed_queries_timeouts Signed-off-by: Azat Khuzhin --- .../0_stateless/02805_distributed_queries_timeouts.sql | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/queries/0_stateless/02805_distributed_queries_timeouts.sql b/tests/queries/0_stateless/02805_distributed_queries_timeouts.sql index bfa39cd78ee..f6bccc99977 100644 --- a/tests/queries/0_stateless/02805_distributed_queries_timeouts.sql +++ b/tests/queries/0_stateless/02805_distributed_queries_timeouts.sql @@ -1,4 +1,4 @@ create table dist as system.one engine=Distributed(test_shard_localhost, system, one); -select sleep(3) from dist settings prefer_localhost_replica=0, receive_timeout=1, async_socket_for_remote=0, use_hedged_requests=1 format Null; -select sleep(3) from dist settings prefer_localhost_replica=0, receive_timeout=1, async_socket_for_remote=1, use_hedged_requests=0 format Null; -select sleep(3) from dist settings prefer_localhost_replica=0, receive_timeout=1, async_socket_for_remote=0, use_hedged_requests=0 format Null; +select sleep(8) from dist settings function_sleep_max_microseconds_per_block=8e9, prefer_localhost_replica=0, receive_timeout=7, async_socket_for_remote=0, use_hedged_requests=1 format Null; +select sleep(8) from dist settings function_sleep_max_microseconds_per_block=8e9, prefer_localhost_replica=0, receive_timeout=7, async_socket_for_remote=1, use_hedged_requests=0 format Null; +select sleep(8) from dist settings function_sleep_max_microseconds_per_block=8e9, prefer_localhost_replica=0, receive_timeout=7, async_socket_for_remote=0, use_hedged_requests=0 format Null; From 86e23b346fbce791794e38ad7ae77d8af964988a Mon Sep 17 00:00:00 2001 From: Max K Date: Thu, 25 Jul 2024 12:12:37 +0200 Subject: [PATCH 180/661] rename test stages --- .github/workflows/master.yml | 21 +++++++++++---------- .github/workflows/pull_request.yml | 20 ++++++++++---------- tests/ci/ci_config.py | 6 +++--- tests/ci/ci_definitions.py | 6 +++--- tests/ci/test_ci_config.py | 4 ++-- 5 files changed, 29 insertions(+), 28 deletions(-) diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml index acd7511d520..2ce1124404f 100644 --- a/.github/workflows/master.yml +++ b/.github/workflows/master.yml @@ -93,21 +93,21 @@ jobs: with: stage: Builds_2 data: ${{ needs.RunConfig.outputs.data }} - Tests_2: + Tests_2_ww: needs: [RunConfig, Builds_2] + if: ${{ !failure() && !cancelled() && contains(fromJson(needs.RunConfig.outputs.data).stages_data.stages_to_do, 'Tests_2_ww') }} + uses: ./.github/workflows/reusable_test_stage.yml + with: + stage: Tests_2_ww + data: ${{ needs.RunConfig.outputs.data }} + Tests_2: + # Test_3 should not wait for Test_1/Test_2 and should not be blocked by them on master branch since all jobs need to run there. + needs: [RunConfig, Builds_1] if: ${{ !failure() && !cancelled() && contains(fromJson(needs.RunConfig.outputs.data).stages_data.stages_to_do, 'Tests_2') }} uses: ./.github/workflows/reusable_test_stage.yml with: stage: Tests_2 data: ${{ needs.RunConfig.outputs.data }} - Tests_3: - # Test_3 should not wait for Test_1/Test_2 and should not be blocked by them on master branch since all jobs need to run there. - needs: [RunConfig, Builds_1] - if: ${{ !failure() && !cancelled() && contains(fromJson(needs.RunConfig.outputs.data).stages_data.stages_to_do, 'Tests_3') }} - uses: ./.github/workflows/reusable_test_stage.yml - with: - stage: Tests_3 - data: ${{ needs.RunConfig.outputs.data }} ################################# Reports ################################# # Reports should run even if Builds_1/2 fail - run them separately, not in Tests_1/2/3 @@ -123,7 +123,7 @@ jobs: FinishCheck: if: ${{ !cancelled() }} - needs: [RunConfig, Builds_1, Builds_2, Builds_Report, Tests_1, Tests_2, Tests_3] + needs: [RunConfig, Builds_1, Builds_2, Builds_Report, Tests_1, Tests_2_ww, Tests_2] runs-on: [self-hosted, style-checker-aarch64] steps: - name: Check out repository code @@ -133,6 +133,7 @@ jobs: cd "$GITHUB_WORKSPACE/tests/ci" python3 finish_check.py --wf-status ${{ contains(needs.*.result, 'failure') && 'failure' || 'success' }} - name: Check Workflow results + if: ${{ !cancelled() }} run: | export WORKFLOW_RESULT_FILE="/tmp/workflow_results.json" cat > "$WORKFLOW_RESULT_FILE" << 'EOF' diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index 34bf51871d2..854dff530e7 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -123,20 +123,20 @@ jobs: stage: Builds_2 data: ${{ needs.RunConfig.outputs.data }} # stage for running non-required checks without being blocked by required checks (Test_1) if corresponding settings is selected - Tests_2: + Tests_2_ww: needs: [RunConfig, Builds_1] + if: ${{ !failure() && !cancelled() && contains(fromJson(needs.RunConfig.outputs.data).stages_data.stages_to_do, 'Tests_2_ww') }} + uses: ./.github/workflows/reusable_test_stage.yml + with: + stage: Tests_2_ww + data: ${{ needs.RunConfig.outputs.data }} + Tests_2: + needs: [RunConfig, Builds_1, Tests_1] if: ${{ !failure() && !cancelled() && contains(fromJson(needs.RunConfig.outputs.data).stages_data.stages_to_do, 'Tests_2') }} uses: ./.github/workflows/reusable_test_stage.yml with: stage: Tests_2 data: ${{ needs.RunConfig.outputs.data }} - Tests_3: - needs: [RunConfig, Builds_1, Tests_1] - if: ${{ !failure() && !cancelled() && contains(fromJson(needs.RunConfig.outputs.data).stages_data.stages_to_do, 'Tests_3') }} - uses: ./.github/workflows/reusable_test_stage.yml - with: - stage: Tests_3 - data: ${{ needs.RunConfig.outputs.data }} ################################# Reports ################################# # Reports should run even if Builds_1/2 fail - run them separately (not in Tests_1/2/3) @@ -154,7 +154,7 @@ jobs: if: ${{ !cancelled() }} # Test_2 or Test_3 do not have the jobs required for Mergeable check, # however, set them as "needs" to get all checks results before the automatic merge occurs. - needs: [RunConfig, BuildDockers, StyleCheck, FastTest, Builds_1, Builds_2, Builds_Report, Tests_1, Tests_2, Tests_3] + needs: [RunConfig, BuildDockers, StyleCheck, FastTest, Builds_1, Builds_2, Builds_Report, Tests_1, Tests_2_ww, Tests_2] runs-on: [self-hosted, style-checker-aarch64] steps: - name: Check out repository code @@ -178,7 +178,7 @@ jobs: # FinishCheck: if: ${{ !failure() && !cancelled() }} - needs: [RunConfig, BuildDockers, StyleCheck, FastTest, Builds_1, Builds_2, Builds_Report, Tests_1, Tests_2, Tests_3] + needs: [RunConfig, BuildDockers, StyleCheck, FastTest, Builds_1, Builds_2, Builds_Report, Tests_1, Tests_2_ww, Tests_2] runs-on: [self-hosted, style-checker-aarch64] steps: - name: Check out repository code diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py index a9bdb639835..0e295b2339d 100644 --- a/tests/ci/ci_config.py +++ b/tests/ci/ci_config.py @@ -587,10 +587,10 @@ class CI: if job_name in REQUIRED_CHECKS: stage_type = WorkflowStages.TESTS_1 else: - stage_type = WorkflowStages.TESTS_3 + stage_type = WorkflowStages.TESTS_2 assert stage_type, f"BUG [{job_name}]" - if non_blocking_ci and stage_type == WorkflowStages.TESTS_3: - stage_type = WorkflowStages.TESTS_2 + if non_blocking_ci and stage_type == WorkflowStages.TESTS_2: + stage_type = WorkflowStages.TESTS_2_WW return stage_type @classmethod diff --git a/tests/ci/ci_definitions.py b/tests/ci/ci_definitions.py index 149177ecba5..054b554b8fa 100644 --- a/tests/ci/ci_definitions.py +++ b/tests/ci/ci_definitions.py @@ -67,10 +67,10 @@ class WorkflowStages(metaclass=WithIter): BUILDS_2 = "Builds_2" # all tests required for merge TESTS_1 = "Tests_1" - # not used atm - TESTS_2 = "Tests_2" + # used in woolenwolfdog mode + TESTS_2_WW = "Tests_2_ww" # all tests not required for merge - TESTS_3 = "Tests_3" + TESTS_2 = "Tests_2" class Runners(metaclass=WithIter): diff --git a/tests/ci/test_ci_config.py b/tests/ci/test_ci_config.py index 4a2bd606d0e..be540413b3c 100644 --- a/tests/ci/test_ci_config.py +++ b/tests/ci/test_ci_config.py @@ -211,7 +211,7 @@ class TestCIConfig(unittest.TestCase): else: self.assertTrue( CI.get_job_ci_stage(job) - in (CI.WorkflowStages.TESTS_1, CI.WorkflowStages.TESTS_3), + in (CI.WorkflowStages.TESTS_1, CI.WorkflowStages.TESTS_2), msg=f"Stage for [{job}] is not correct", ) @@ -242,7 +242,7 @@ class TestCIConfig(unittest.TestCase): else: self.assertTrue( CI.get_job_ci_stage(job, non_blocking_ci=True) - in (CI.WorkflowStages.TESTS_1, CI.WorkflowStages.TESTS_2), + in (CI.WorkflowStages.TESTS_1, CI.WorkflowStages.TESTS_2_WW), msg=f"Stage for [{job}] is not correct", ) From a32c702caa142d15bc3e5bc51ca90240d5d010a9 Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Thu, 25 Jul 2024 18:23:47 +0800 Subject: [PATCH 181/661] fix style --- src/Functions/map.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/Functions/map.cpp b/src/Functions/map.cpp index 5319390fb70..a8e5f7ad90e 100644 --- a/src/Functions/map.cpp +++ b/src/Functions/map.cpp @@ -236,7 +236,6 @@ public: ColumnPtr data_keys = col_keys->getDataPtr(); if (isColumnNullableOrLowCardinalityNullable(*data_keys)) { - std::cout << "data keys is nullable" << std::endl; const NullMap * null_map = nullptr; if (const auto * nullable = checkAndGetColumn(data_keys.get())) { @@ -263,9 +262,7 @@ public: const auto & data_values = col_values->getDataPtr(); const auto & offsets = col_keys->getOffsetsPtr(); - std::cout << "before create array:" << "offsets:" << offsets->getName() << std::endl; auto nested_column = ColumnArray::create(ColumnTuple::create(Columns{std::move(data_keys), data_values}), offsets); - std::cout << "after create array:" << "offsets:" << offsets->getName() << std::endl; return ColumnMap::create(nested_column); } }; From 7fedc0ffbee9d04e0352037021a127cea93cbbfa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Thu, 25 Jul 2024 12:26:37 +0200 Subject: [PATCH 182/661] Update base/glibc-compatibility/musl/getauxval.c Co-authored-by: Alexander Gololobov --- base/glibc-compatibility/musl/getauxval.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/base/glibc-compatibility/musl/getauxval.c b/base/glibc-compatibility/musl/getauxval.c index b5bd2f114c2..28cb0f8d005 100644 --- a/base/glibc-compatibility/musl/getauxval.c +++ b/base/glibc-compatibility/musl/getauxval.c @@ -99,12 +99,12 @@ static unsigned long NO_SANITIZE_THREAD __auxv_init_procfs(unsigned long type) /// #8 0x5622c0d6b7cd in _start (./ClickHouse/build_msan/contrib/google-protobuf-cmake/protoc+0x22c7cd) (BuildId: 6411d3c88b898ba3f7d49760555977d3e61f0741) /// The source of the issue above is that, at this point in time during __msan_init, we can't really do much as - /// most global variables aren't initialized or available yet, so we we can't initiate the auxiliary vector. + /// most global variables aren't initialized or available yet, so we can't initiate the auxiliary vector. /// Normal glibc / musl getauxval doesn't have this problem since they initiate their auxval vector at the very /// start of __libc_start_main (just keeping track of argv+argc+1), but we don't have such option (otherwise // this complexity of reading "/proc/self/auxv" or using __environ would not be necessary). - /// To avoid this crashes on the re-exec call (see above how it would fail when creating `aux`, and it we used + /// To avoid this crashes on the re-exec call (see above how it would fail when creating `aux`, and if we used /// __auxv_init_environ then it would SIGSEV on READing `__environ`) we capture this call for `AT_EXECFN` and /// unconditionally return "/proc/self/exe" without any preparation. Theoretically this should be fine in /// our case, as we don't load any libraries. That's the theory at least. From beb506a5b8179f5c88a6f5fc90d62b8e74bf0d35 Mon Sep 17 00:00:00 2001 From: maxvostrikov Date: Thu, 25 Jul 2024 12:37:05 +0200 Subject: [PATCH 183/661] added somme tests in relation with https://github.com/ClickHouse/ClickHouse/pull/54881 with new behaviour when enable_named_columns_in_function_tuple=1 (default value) --- .../0_stateless/00307_format_xml.reference | 41 ++++++++++++++++++ .../queries/0_stateless/00307_format_xml.sql | 3 ++ .../0_stateless/00309_formats.reference | Bin 18537 -> 18736 bytes tests/queries/0_stateless/00309_formats.sql | 5 +++ 4 files changed, 49 insertions(+) diff --git a/tests/queries/0_stateless/00307_format_xml.reference b/tests/queries/0_stateless/00307_format_xml.reference index 2d9badc5a3e..14e74653d4f 100644 --- a/tests/queries/0_stateless/00307_format_xml.reference +++ b/tests/queries/0_stateless/00307_format_xml.reference @@ -1,3 +1,4 @@ +unnamed columns in tuple @@ -54,3 +55,43 @@ 1 +named columns in tuple + + + + + + s + String + + + time + DateTime + + + tpl + Tuple(String, DateTime) + + + + + + Hello & world + + Hello & world2001-02-03 04:05:06 + + + + + Hello & world + + Hello & world2001-02-03 04:05:06 + + + Hello & world + + Hello & world2001-02-03 04:05:06 + + + 1 + diff --git a/tests/queries/0_stateless/00307_format_xml.sql b/tests/queries/0_stateless/00307_format_xml.sql index 29c733bb186..22566112bc7 100644 --- a/tests/queries/0_stateless/00307_format_xml.sql +++ b/tests/queries/0_stateless/00307_format_xml.sql @@ -1,2 +1,5 @@ SET output_format_write_statistics = 0; +SELECT 'unnamed columns in tuple'; SELECT 'Hello & world' AS s, 'Hello\n', toDateTime('2001-02-03 04:05:06') AS time, arrayMap(x -> toString(x), range(10)) AS arr, (s, time) AS tpl SETTINGS extremes = 1, enable_named_columns_in_function_tuple = 0 FORMAT XML; +SELECT 'named columns in tuple'; +SELECT 'Hello & world' AS s, toDateTime('2001-02-03 04:05:06') AS time, (s, time) AS tpl SETTINGS extremes = 1, enable_named_columns_in_function_tuple = 0 FORMAT XML;` diff --git a/tests/queries/0_stateless/00309_formats.reference b/tests/queries/0_stateless/00309_formats.reference index e637ee0363a7b35152a155ae3fa73a4f451d5148..a63720618ba54c6cc456f3356512449322dc2e80 100644 GIT binary patch delta 149 zcmaDkfpNnm#toSsmduQKj47-or3E>uY@wccC1xfpE{P?n;vparjXVX2n2th<0!Ubs ziva?zYfU!f5;0_8gmBMl=W(WRf=%Iq=->n?=fp5&@&sYg$;X8ig*A*!&2-d_3=9l( M4J>sHjny@|06s7$QUCw| delta 9 QcmdlmiSgwG#toSs02V|99{>OV diff --git a/tests/queries/0_stateless/00309_formats.sql b/tests/queries/0_stateless/00309_formats.sql index b0939c00a10..691fc6e7ab6 100644 --- a/tests/queries/0_stateless/00309_formats.sql +++ b/tests/queries/0_stateless/00309_formats.sql @@ -9,3 +9,8 @@ SELECT number * 246 + 10 AS n, toDate('2000-01-01') + n AS d, range(n) AS arr, a SELECT number * 246 + 10 AS n, toDate('2000-01-01') + n AS d, range(n) AS arr, arrayStringConcat(arrayMap(x -> reinterpretAsString(x), arr)) AS s, (n, d) AS tuple FROM system.numbers LIMIT 2 FORMAT JSON; SELECT number * 246 + 10 AS n, toDate('2000-01-01') + n AS d, range(n) AS arr, arrayStringConcat(arrayMap(x -> reinterpretAsString(x), arr)) AS s, (n, d) AS tuple FROM system.numbers LIMIT 2 FORMAT JSONCompact; SELECT number * 246 + 10 AS n, toDate('2000-01-01') + n AS d, range(n) AS arr, arrayStringConcat(arrayMap(x -> reinterpretAsString(x), arr)) AS s, (n, d) AS tuple FROM system.numbers LIMIT 2 FORMAT XML; + +SET enable_named_columns_in_function_tuple = 1; + +SELECT number * 246 + 10 AS n, toDate('2000-01-01') + n AS d, (n, d) AS tuple FROM system.numbers LIMIT 2 FORMAT RowBinaryWithNamesAndTypes; +SELECT number * 246 + 10 AS n, toDate('2000-01-01') + n AS d, (n, d) AS tuple FROM system.numbers LIMIT 2 FORMAT TabSeparatedWithNamesAndTypes; From 0dc67aae97d4b964cb0f9c389cbf3ce91cb76fb7 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Thu, 25 Jul 2024 13:52:30 +0200 Subject: [PATCH 184/661] fix MIN_ITERATIONS 2 --- .../0_stateless/01171_mv_select_insert_isolation_long.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01171_mv_select_insert_isolation_long.sh b/tests/queries/0_stateless/01171_mv_select_insert_isolation_long.sh index d79ab27d8b2..620281ee972 100755 --- a/tests/queries/0_stateless/01171_mv_select_insert_isolation_long.sh +++ b/tests/queries/0_stateless/01171_mv_select_insert_isolation_long.sh @@ -168,7 +168,7 @@ fi START_TIME=$(get_now) STOP_TIME=$((START_TIME + MAIN_TIME_PART)) SECOND_STOP_TIME=$((STOP_TIME + SECOND_TIME_PART)) -MIN_ITERATIONS=30 +MIN_ITERATIONS=25 run_until_deadline_and_at_least_times $STOP_TIME $MIN_ITERATIONS insert_commit_action 1 & PID_1=$! run_until_deadline_and_at_least_times $STOP_TIME $MIN_ITERATIONS insert_commit_action 2 & PID_2=$! From 2988e13050f0ab8a06e36ec8fe745386a214141b Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Thu, 25 Jul 2024 13:55:01 +0200 Subject: [PATCH 185/661] Free bitmask --- base/base/getMemoryAmount.cpp | 1 + programs/server/Server.cpp | 2 ++ 2 files changed, 3 insertions(+) diff --git a/base/base/getMemoryAmount.cpp b/base/base/getMemoryAmount.cpp index b8162146496..56cddbfd628 100644 --- a/base/base/getMemoryAmount.cpp +++ b/base/base/getMemoryAmount.cpp @@ -84,6 +84,7 @@ uint64_t getMemoryAmountOrZero() memory_amount = total_numa_memory; } + numa_bitmask_free(membind); } #endif diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 619a72ff200..b9a7c298f00 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -778,6 +778,8 @@ try "ClickHouse is bound to a subset of NUMA nodes. Total memory of all available nodes {}", ReadableSize(total_numa_memory)); } + + numa_bitmask_free(membind); } #endif From b80305ba981ca1f862084d3316144efcba17466b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Thu, 25 Jul 2024 14:31:00 +0200 Subject: [PATCH 186/661] Improve backport script --- tests/ci/cherry_pick.py | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/tests/ci/cherry_pick.py b/tests/ci/cherry_pick.py index a7fc6d02853..623a816148e 100644 --- a/tests/ci/cherry_pick.py +++ b/tests/ci/cherry_pick.py @@ -420,7 +420,8 @@ class Backport: fetch_release_prs = self.gh.get_release_pulls(self._fetch_from) fetch_release_branches = [pr.head.ref for pr in fetch_release_prs] self.labels_to_backport = [ - f"v{branch}-must-backport" for branch in fetch_release_branches + f"v{branch if self._repo_name == "ClickHouse/ClickHouse" else branch.replace('release/','')}-must-backport" + for branch in fetch_release_branches ] logging.info("Fetching from %s", self._fetch_from) @@ -490,17 +491,23 @@ class Backport: def process_pr(self, pr: PullRequest) -> None: pr_labels = [label.name for label in pr.labels] - if ( - any(label in pr_labels for label in self.must_create_backport_labels) - or self._repo_name != self._fetch_from - ): + if any(label in pr_labels for label in self.must_create_backport_labels): branches = [ ReleaseBranch(br, pr, self.repo, self.backport_created_label) for br in self.release_branches ] # type: List[ReleaseBranch] else: branches = [ - ReleaseBranch(br, pr, self.repo, self.backport_created_label) + ReleaseBranch( + ( + br + if self._repo_name == "ClickHouse/clickhouse" + else f"release/{br}" + ), + pr, + self.repo, + self.backport_created_label, + ) for br in [ label.split("-", 1)[0][1:] # v21.8-must-backport for label in pr_labels From 2c83a39503255f0b2233b511a63a262cb8749a53 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Thu, 25 Jul 2024 12:53:16 +0000 Subject: [PATCH 187/661] Fixes --- tests/ci/cherry_pick.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/ci/cherry_pick.py b/tests/ci/cherry_pick.py index 623a816148e..c2f567e5f15 100644 --- a/tests/ci/cherry_pick.py +++ b/tests/ci/cherry_pick.py @@ -420,7 +420,9 @@ class Backport: fetch_release_prs = self.gh.get_release_pulls(self._fetch_from) fetch_release_branches = [pr.head.ref for pr in fetch_release_prs] self.labels_to_backport = [ - f"v{branch if self._repo_name == "ClickHouse/ClickHouse" else branch.replace('release/','')}-must-backport" + f"v{branch}-must-backport" + if self._repo_name == "ClickHouse/ClickHouse" + else f"v{branch.replace('release/','')}-must-backport" for branch in fetch_release_branches ] @@ -501,7 +503,7 @@ class Backport: ReleaseBranch( ( br - if self._repo_name == "ClickHouse/clickhouse" + if self._repo_name == "ClickHouse/Clickhouse" else f"release/{br}" ), pr, From 92cca8e65dec9f46d5a248c10e748088c9437cb6 Mon Sep 17 00:00:00 2001 From: Konstantin Bogdanov Date: Thu, 25 Jul 2024 14:51:53 +0200 Subject: [PATCH 188/661] Fix --- src/AggregateFunctions/SingleValueData.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/AggregateFunctions/SingleValueData.cpp b/src/AggregateFunctions/SingleValueData.cpp index a14caf00f73..996e64b22e0 100644 --- a/src/AggregateFunctions/SingleValueData.cpp +++ b/src/AggregateFunctions/SingleValueData.cpp @@ -1191,7 +1191,7 @@ bool SingleValueDataString::isEqualTo(const DB::IColumn & column, size_t row_num bool SingleValueDataString::isEqualTo(const SingleValueDataBase & other) const { auto const & to = assert_cast(other); - return has() && to.getStringRef() == getStringRef(); + return has() && to.has() && to.getStringRef() == getStringRef(); } void SingleValueDataString::set(const IColumn & column, size_t row_num, Arena * arena) From b23ce171c3620568829201d80789f314fc27499a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Thu, 25 Jul 2024 13:11:32 +0000 Subject: [PATCH 189/661] My black version said this was ok --- tests/ci/cherry_pick.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tests/ci/cherry_pick.py b/tests/ci/cherry_pick.py index c2f567e5f15..0b2aa9a2d35 100644 --- a/tests/ci/cherry_pick.py +++ b/tests/ci/cherry_pick.py @@ -420,9 +420,11 @@ class Backport: fetch_release_prs = self.gh.get_release_pulls(self._fetch_from) fetch_release_branches = [pr.head.ref for pr in fetch_release_prs] self.labels_to_backport = [ - f"v{branch}-must-backport" - if self._repo_name == "ClickHouse/ClickHouse" - else f"v{branch.replace('release/','')}-must-backport" + ( + f"v{branch}-must-backport" + if self._repo_name == "ClickHouse/ClickHouse" + else f"v{branch.replace('release/','')}-must-backport" + ) for branch in fetch_release_branches ] From c5164fede8665b61c10ec0d7b6873a7cf04aab12 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 25 Jul 2024 13:17:21 +0000 Subject: [PATCH 190/661] Fix some test. --- src/Interpreters/ExpressionAnalyzer.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index 6b5b129085d..5972d89bddd 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -1944,7 +1944,7 @@ ExpressionAnalysisResult::ExpressionAnalysisResult( Block before_prewhere_sample = source_header; if (sanitizeBlock(before_prewhere_sample)) { - prewhere_dag_and_flags->dag.updateHeader(before_prewhere_sample); + before_prewhere_sample = prewhere_dag_and_flags->dag.updateHeader(before_prewhere_sample); auto & column_elem = before_prewhere_sample.getByName(query.prewhere()->getColumnName()); /// If the filter column is a constant, record it. if (column_elem.column) @@ -1976,7 +1976,7 @@ ExpressionAnalysisResult::ExpressionAnalysisResult( before_where_sample = source_header; if (sanitizeBlock(before_where_sample)) { - before_where->dag.updateHeader(before_where_sample); + before_where_sample = before_where->dag.updateHeader(before_where_sample); auto & column_elem = before_where_sample.getByName(query.where()->getColumnName()); From ae75c99e3fad02a3716c9d520c3a680f4d9d28e4 Mon Sep 17 00:00:00 2001 From: Konstantin Bogdanov Date: Thu, 25 Jul 2024 15:17:59 +0200 Subject: [PATCH 191/661] Add a test --- .../0_stateless/03210_fix-single-value-data-assertion.reference | 0 .../0_stateless/03210_fix-single-value-data-assertion.sql | 1 + 2 files changed, 1 insertion(+) create mode 100644 tests/queries/0_stateless/03210_fix-single-value-data-assertion.reference create mode 100644 tests/queries/0_stateless/03210_fix-single-value-data-assertion.sql diff --git a/tests/queries/0_stateless/03210_fix-single-value-data-assertion.reference b/tests/queries/0_stateless/03210_fix-single-value-data-assertion.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03210_fix-single-value-data-assertion.sql b/tests/queries/0_stateless/03210_fix-single-value-data-assertion.sql new file mode 100644 index 00000000000..66e62377d6b --- /dev/null +++ b/tests/queries/0_stateless/03210_fix-single-value-data-assertion.sql @@ -0,0 +1 @@ +SELECT intDiv(number, 2) AS k, count(toFixedString(toFixedString('hello', 5), 5)) IGNORE NULLS, sumArgMax(number, toString(number % 20)), argMax(toString(number), number) FROM (SELECT number FROM system.numbers LIMIT 65537) WHERE toLowCardinality(toLowCardinality(toNullable(21))) GROUP BY k WITH TOTALS ORDER BY k ASC NULLS FIRST LIMIT 255 SETTINGS group_by_overflow_mode = 'any', totals_mode = 'before_having', max_rows_to_group_by = 100000 FORMAT Null From ad44fb1ba4759434ecb4353a7878aea6162f8fef Mon Sep 17 00:00:00 2001 From: avogar Date: Thu, 25 Jul 2024 13:22:19 +0000 Subject: [PATCH 192/661] Increase lock_acquire_timeout_for_background_operations setting in dynamic merges tests --- .../03037_dynamic_merges_1_horizontal_compact_merge_tree.sql | 2 +- .../03037_dynamic_merges_1_horizontal_compact_wide_tree.sql | 2 +- .../03037_dynamic_merges_1_vertical_compact_merge_tree.sql | 5 +++-- .../03037_dynamic_merges_1_vertical_wide_merge_tree.sql | 2 +- .../03037_dynamic_merges_2_horizontal_compact_merge_tree.sql | 2 +- .../03037_dynamic_merges_2_horizontal_wide_merge_tree.sql | 2 +- .../03037_dynamic_merges_2_vertical_compact_merge_tree.sql | 2 +- .../03037_dynamic_merges_2_vertical_wide_merge_tree.sql | 2 +- .../03038_nested_dynamic_merges_compact_horizontal.sql | 2 +- .../03038_nested_dynamic_merges_compact_vertical.sql | 2 +- .../03038_nested_dynamic_merges_wide_horizontal.sql | 2 +- .../03038_nested_dynamic_merges_wide_vertical.sql | 2 +- 12 files changed, 14 insertions(+), 13 deletions(-) diff --git a/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal_compact_merge_tree.sql b/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal_compact_merge_tree.sql index b66fe5e2187..07371ee099b 100644 --- a/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal_compact_merge_tree.sql +++ b/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal_compact_merge_tree.sql @@ -2,7 +2,7 @@ set allow_experimental_dynamic_type=1; drop table if exists test; -create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000, vertical_merge_algorithm_min_columns_to_activate=10, index_granularity_bytes=10485760, index_granularity=8192, merge_max_block_size=8192, merge_max_block_size_bytes=10485760; +create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000, vertical_merge_algorithm_min_columns_to_activate=10, index_granularity_bytes=10485760, index_granularity=8192, merge_max_block_size=8192, merge_max_block_size_bytes=10485760, lock_acquire_timeout_for_background_operations=600; system stop merges test; insert into test select number, number from numbers(100000); diff --git a/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal_compact_wide_tree.sql b/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal_compact_wide_tree.sql index 8a376b6d7d7..2b55a31e937 100644 --- a/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal_compact_wide_tree.sql +++ b/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal_compact_wide_tree.sql @@ -2,7 +2,7 @@ set allow_experimental_dynamic_type=1; drop table if exists test; -create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1, vertical_merge_algorithm_min_columns_to_activate=10, index_granularity_bytes=10485760, index_granularity=8192, merge_max_block_size=8192, merge_max_block_size_bytes=10485760; +create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1, vertical_merge_algorithm_min_columns_to_activate=10, index_granularity_bytes=10485760, index_granularity=8192, merge_max_block_size=8192, merge_max_block_size_bytes=10485760, lock_acquire_timeout_for_background_operations=600; system stop merges test; insert into test select number, number from numbers(100000); diff --git a/tests/queries/0_stateless/03037_dynamic_merges_1_vertical_compact_merge_tree.sql b/tests/queries/0_stateless/03037_dynamic_merges_1_vertical_compact_merge_tree.sql index 127b56e727c..ea7295a9eab 100644 --- a/tests/queries/0_stateless/03037_dynamic_merges_1_vertical_compact_merge_tree.sql +++ b/tests/queries/0_stateless/03037_dynamic_merges_1_vertical_compact_merge_tree.sql @@ -2,7 +2,7 @@ set allow_experimental_dynamic_type=1; drop table if exists test; -create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1, index_granularity_bytes=10485760, index_granularity=8192, merge_max_block_size=8192, merge_max_block_size_bytes=10485760; +create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1, index_granularity_bytes=10485760, index_granularity=8192, merge_max_block_size=8192, merge_max_block_size_bytes=10485760, lock_acquire_timeout_for_background_operations=600; system stop merges test; insert into test select number, number from numbers(100000); @@ -13,7 +13,8 @@ insert into test select number, toDateTime(number) from numbers(50000); insert into test select number, NULL from numbers(100000); select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); -system start merges test; optimize table test final;; +system start merges test; +optimize table test final; select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); system stop merges test; diff --git a/tests/queries/0_stateless/03037_dynamic_merges_1_vertical_wide_merge_tree.sql b/tests/queries/0_stateless/03037_dynamic_merges_1_vertical_wide_merge_tree.sql index e5c273cb592..e888a14b323 100644 --- a/tests/queries/0_stateless/03037_dynamic_merges_1_vertical_wide_merge_tree.sql +++ b/tests/queries/0_stateless/03037_dynamic_merges_1_vertical_wide_merge_tree.sql @@ -2,7 +2,7 @@ set allow_experimental_dynamic_type=1; drop table if exists test; -create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1, index_granularity_bytes=10485760, index_granularity=8192, merge_max_block_size=8192, merge_max_block_size_bytes=10485760; +create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1, index_granularity_bytes=10485760, index_granularity=8192, merge_max_block_size=8192, merge_max_block_size_bytes=10485760, lock_acquire_timeout_for_background_operations=600; system stop merges test; insert into test select number, number from numbers(100000); diff --git a/tests/queries/0_stateless/03037_dynamic_merges_2_horizontal_compact_merge_tree.sql b/tests/queries/0_stateless/03037_dynamic_merges_2_horizontal_compact_merge_tree.sql index 6d7a0dd8c18..e633b277ebd 100644 --- a/tests/queries/0_stateless/03037_dynamic_merges_2_horizontal_compact_merge_tree.sql +++ b/tests/queries/0_stateless/03037_dynamic_merges_2_horizontal_compact_merge_tree.sql @@ -3,7 +3,7 @@ set allow_experimental_dynamic_type = 1; drop table if exists test; -create table test (id UInt64, d Dynamic) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000; +create table test (id UInt64, d Dynamic) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000, lock_acquire_timeout_for_background_operations=600; system stop merges test; insert into test select number, number from numbers(1000000); insert into test select number, 'str_' || toString(number) from numbers(1000000, 1000000); diff --git a/tests/queries/0_stateless/03037_dynamic_merges_2_horizontal_wide_merge_tree.sql b/tests/queries/0_stateless/03037_dynamic_merges_2_horizontal_wide_merge_tree.sql index 011d54d2360..90dbc2d84f5 100644 --- a/tests/queries/0_stateless/03037_dynamic_merges_2_horizontal_wide_merge_tree.sql +++ b/tests/queries/0_stateless/03037_dynamic_merges_2_horizontal_wide_merge_tree.sql @@ -3,7 +3,7 @@ set allow_experimental_dynamic_type = 1; drop table if exists test; -create table test (id UInt64, d Dynamic) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1; +create table test (id UInt64, d Dynamic) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1, lock_acquire_timeout_for_background_operations=600; system stop merges test; insert into test select number, number from numbers(1000000); insert into test select number, 'str_' || toString(number) from numbers(1000000, 1000000); diff --git a/tests/queries/0_stateless/03037_dynamic_merges_2_vertical_compact_merge_tree.sql b/tests/queries/0_stateless/03037_dynamic_merges_2_vertical_compact_merge_tree.sql index 1a74f9e5417..ffd2618ee51 100644 --- a/tests/queries/0_stateless/03037_dynamic_merges_2_vertical_compact_merge_tree.sql +++ b/tests/queries/0_stateless/03037_dynamic_merges_2_vertical_compact_merge_tree.sql @@ -3,7 +3,7 @@ set allow_experimental_dynamic_type = 1; drop table if exists test; -create table test (id UInt64, d Dynamic) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1; +create table test (id UInt64, d Dynamic) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1, lock_acquire_timeout_for_background_operations=600; system stop merges test; insert into test select number, number from numbers(1000000); insert into test select number, 'str_' || toString(number) from numbers(1000000, 1000000); diff --git a/tests/queries/0_stateless/03037_dynamic_merges_2_vertical_wide_merge_tree.sql b/tests/queries/0_stateless/03037_dynamic_merges_2_vertical_wide_merge_tree.sql index cbc834e9660..36dff88751b 100644 --- a/tests/queries/0_stateless/03037_dynamic_merges_2_vertical_wide_merge_tree.sql +++ b/tests/queries/0_stateless/03037_dynamic_merges_2_vertical_wide_merge_tree.sql @@ -3,7 +3,7 @@ set allow_experimental_dynamic_type = 1; drop table if exists test; -create table test (id UInt64, d Dynamic) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1; +create table test (id UInt64, d Dynamic) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1, lock_acquire_timeout_for_background_operations=600; system stop merges test; insert into test select number, number from numbers(1000000); insert into test select number, 'str_' || toString(number) from numbers(1000000, 1000000); diff --git a/tests/queries/0_stateless/03038_nested_dynamic_merges_compact_horizontal.sql b/tests/queries/0_stateless/03038_nested_dynamic_merges_compact_horizontal.sql index ff1dc5e7ded..1d5c63dcdf1 100644 --- a/tests/queries/0_stateless/03038_nested_dynamic_merges_compact_horizontal.sql +++ b/tests/queries/0_stateless/03038_nested_dynamic_merges_compact_horizontal.sql @@ -6,7 +6,7 @@ set allow_experimental_dynamic_type = 1; set enable_named_columns_in_function_tuple = 0; drop table if exists test;; -create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000;; +create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000, lock_acquire_timeout_for_background_operations=600; system stop merges test; insert into test select number, number from numbers(100000); diff --git a/tests/queries/0_stateless/03038_nested_dynamic_merges_compact_vertical.sql b/tests/queries/0_stateless/03038_nested_dynamic_merges_compact_vertical.sql index f9b0101cb87..2bffe35c577 100644 --- a/tests/queries/0_stateless/03038_nested_dynamic_merges_compact_vertical.sql +++ b/tests/queries/0_stateless/03038_nested_dynamic_merges_compact_vertical.sql @@ -6,7 +6,7 @@ set allow_experimental_dynamic_type = 1; set enable_named_columns_in_function_tuple = 0; drop table if exists test;; -create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1; +create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1, lock_acquire_timeout_for_background_operations=600; system stop merges test; insert into test select number, number from numbers(100000); diff --git a/tests/queries/0_stateless/03038_nested_dynamic_merges_wide_horizontal.sql b/tests/queries/0_stateless/03038_nested_dynamic_merges_wide_horizontal.sql index 5f373d41c7d..fb686091ebb 100644 --- a/tests/queries/0_stateless/03038_nested_dynamic_merges_wide_horizontal.sql +++ b/tests/queries/0_stateless/03038_nested_dynamic_merges_wide_horizontal.sql @@ -6,7 +6,7 @@ set allow_experimental_dynamic_type = 1; set enable_named_columns_in_function_tuple = 0; drop table if exists test;; -create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1; +create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1, lock_acquire_timeout_for_background_operations=600; system stop merges test; insert into test select number, number from numbers(100000); diff --git a/tests/queries/0_stateless/03038_nested_dynamic_merges_wide_vertical.sql b/tests/queries/0_stateless/03038_nested_dynamic_merges_wide_vertical.sql index 36bbc76b8cb..ed195452d56 100644 --- a/tests/queries/0_stateless/03038_nested_dynamic_merges_wide_vertical.sql +++ b/tests/queries/0_stateless/03038_nested_dynamic_merges_wide_vertical.sql @@ -6,7 +6,7 @@ set allow_experimental_dynamic_type = 1; set enable_named_columns_in_function_tuple = 0; drop table if exists test;; -create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1; +create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1, lock_acquire_timeout_for_background_operations=600; system stop merges test; insert into test select number, number from numbers(100000); From e0b125368855e57733132046de5cd383ccc9b7d2 Mon Sep 17 00:00:00 2001 From: Konstantin Bogdanov Date: Thu, 25 Jul 2024 15:25:33 +0200 Subject: [PATCH 193/661] Fix harder --- src/AggregateFunctions/SingleValueData.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/AggregateFunctions/SingleValueData.cpp b/src/AggregateFunctions/SingleValueData.cpp index 996e64b22e0..566b40253a3 100644 --- a/src/AggregateFunctions/SingleValueData.cpp +++ b/src/AggregateFunctions/SingleValueData.cpp @@ -195,7 +195,7 @@ bool SingleValueDataFixed::isEqualTo(const IColumn & column, size_t index) co template bool SingleValueDataFixed::isEqualTo(const SingleValueDataFixed & to) const { - return has() && to.value == value; + return has() && to.has() && to.value == value; } template @@ -905,7 +905,7 @@ template bool SingleValueDataNumeric::isEqualTo(const DB::SingleValueDataBase & to) const { auto const & other = assert_cast(to); - return memory.get().isEqualTo(other.memory.get()); + return to.has() && memory.get().isEqualTo(other.memory.get()); } template @@ -1291,7 +1291,7 @@ bool SingleValueDataGeneric::isEqualTo(const IColumn & column, size_t row_num) c bool SingleValueDataGeneric::isEqualTo(const DB::SingleValueDataBase & other) const { auto const & to = assert_cast(other); - return has() && to.value == value; + return has() && to.has() && to.value == value; } void SingleValueDataGeneric::set(const IColumn & column, size_t row_num, Arena *) From 64eeece5331c36c4f2b1c00c2bc229e40cd74d63 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Thu, 25 Jul 2024 15:49:59 +0200 Subject: [PATCH 194/661] fix --- tests/queries/0_stateless/02446_parent_zero_copy_locks.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/02446_parent_zero_copy_locks.sql b/tests/queries/0_stateless/02446_parent_zero_copy_locks.sql index 1cae8ae0237..a44322e02cf 100644 --- a/tests/queries/0_stateless/02446_parent_zero_copy_locks.sql +++ b/tests/queries/0_stateless/02446_parent_zero_copy_locks.sql @@ -36,8 +36,8 @@ select 1, * from rmt1 order by n; system sync replica rmt1; select 2, * from rmt2 order by n; --- wait for outdated parts to be removed -select throwIf(count() = 0) from ( +-- wait for outdated parts to be removed (do not ignore _state column, so it will count Deleting parts as well) +select throwIf(count() = 0), groupArray(_state) from ( select *, _state from system.parts where database=currentDatabase() and table like 'rmt%' and active=0 ) format Null; -- { retry 30 until serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } From f654db215ffb961010763c0daf6484fa75e4fd6b Mon Sep 17 00:00:00 2001 From: Konstantin Bogdanov Date: Thu, 25 Jul 2024 15:56:29 +0200 Subject: [PATCH 195/661] Fix naming --- ....reference => 03210_fix_single_value_data_assertion.reference} | 0 ...ta-assertion.sql => 03210_fix_single_value_data_assertion.sql} | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename tests/queries/0_stateless/{03210_fix-single-value-data-assertion.reference => 03210_fix_single_value_data_assertion.reference} (100%) rename tests/queries/0_stateless/{03210_fix-single-value-data-assertion.sql => 03210_fix_single_value_data_assertion.sql} (100%) diff --git a/tests/queries/0_stateless/03210_fix-single-value-data-assertion.reference b/tests/queries/0_stateless/03210_fix_single_value_data_assertion.reference similarity index 100% rename from tests/queries/0_stateless/03210_fix-single-value-data-assertion.reference rename to tests/queries/0_stateless/03210_fix_single_value_data_assertion.reference diff --git a/tests/queries/0_stateless/03210_fix-single-value-data-assertion.sql b/tests/queries/0_stateless/03210_fix_single_value_data_assertion.sql similarity index 100% rename from tests/queries/0_stateless/03210_fix-single-value-data-assertion.sql rename to tests/queries/0_stateless/03210_fix_single_value_data_assertion.sql From 4b505badd3566cf2b47681c667ee134699cf2764 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Thu, 25 Jul 2024 14:11:41 +0000 Subject: [PATCH 196/661] Attempt to fix flakiness of some window view tests --- .../queries/0_stateless/01052_window_view_proc_tumble_to_now.sh | 1 + tests/queries/0_stateless/01053_window_view_proc_hop_to_now.sh | 1 + tests/queries/0_stateless/01054_window_view_proc_tumble_to.sh | 1 + tests/queries/0_stateless/01055_window_view_proc_hop_to.sh | 1 + .../0_stateless/01075_window_view_proc_tumble_to_now_populate.sh | 1 + 5 files changed, 5 insertions(+) diff --git a/tests/queries/0_stateless/01052_window_view_proc_tumble_to_now.sh b/tests/queries/0_stateless/01052_window_view_proc_tumble_to_now.sh index 4325ebeed24..5c70806ea7b 100755 --- a/tests/queries/0_stateless/01052_window_view_proc_tumble_to_now.sh +++ b/tests/queries/0_stateless/01052_window_view_proc_tumble_to_now.sh @@ -1,4 +1,5 @@ #!/usr/bin/env bash +# Tags: no-random-settings, no-parallel CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/01053_window_view_proc_hop_to_now.sh b/tests/queries/0_stateless/01053_window_view_proc_hop_to_now.sh index 8e28995980f..32c9c52ab09 100755 --- a/tests/queries/0_stateless/01053_window_view_proc_hop_to_now.sh +++ b/tests/queries/0_stateless/01053_window_view_proc_hop_to_now.sh @@ -1,4 +1,5 @@ #!/usr/bin/env bash +# Tags: no-random-settings, no-parallel CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/01054_window_view_proc_tumble_to.sh b/tests/queries/0_stateless/01054_window_view_proc_tumble_to.sh index ee11b265ecd..ba566bb4ae6 100755 --- a/tests/queries/0_stateless/01054_window_view_proc_tumble_to.sh +++ b/tests/queries/0_stateless/01054_window_view_proc_tumble_to.sh @@ -1,4 +1,5 @@ #!/usr/bin/env bash +# Tags: no-random-settings, no-parallel CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/01055_window_view_proc_hop_to.sh b/tests/queries/0_stateless/01055_window_view_proc_hop_to.sh index ea8ad372617..0db4173b3dc 100755 --- a/tests/queries/0_stateless/01055_window_view_proc_hop_to.sh +++ b/tests/queries/0_stateless/01055_window_view_proc_hop_to.sh @@ -1,4 +1,5 @@ #!/usr/bin/env bash +# Tags: no-random-settings, no-parallel CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/01075_window_view_proc_tumble_to_now_populate.sh b/tests/queries/0_stateless/01075_window_view_proc_tumble_to_now_populate.sh index f7842af4dad..67c249a9d0e 100755 --- a/tests/queries/0_stateless/01075_window_view_proc_tumble_to_now_populate.sh +++ b/tests/queries/0_stateless/01075_window_view_proc_tumble_to_now_populate.sh @@ -1,4 +1,5 @@ #!/usr/bin/env bash +# Tags: no-random-settings, no-parallel CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh From baee31c12a68cd6e8f906a9224a39eea446a8f2b Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Thu, 25 Jul 2024 16:15:37 +0200 Subject: [PATCH 197/661] fix truncate database --- src/Interpreters/InterpreterDropQuery.cpp | 3 +-- tests/queries/0_stateless/02842_truncate_database.reference | 2 ++ tests/queries/0_stateless/02842_truncate_database.sql | 4 ++++ 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/src/Interpreters/InterpreterDropQuery.cpp b/src/Interpreters/InterpreterDropQuery.cpp index b68b3ddcd48..bad3e5277db 100644 --- a/src/Interpreters/InterpreterDropQuery.cpp +++ b/src/Interpreters/InterpreterDropQuery.cpp @@ -399,10 +399,9 @@ BlockIO InterpreterDropQuery::executeToDatabaseImpl(const ASTDropQuery & query, if (query.if_empty) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "DROP IF EMPTY is not implemented for databases"); - if (database->hasReplicationThread()) + if (!truncate && database->hasReplicationThread()) database->stopReplication(); - if (database->shouldBeEmptyOnDetach()) { /// Cancel restarting replicas in that database, wait for remaining RESTART queries to finish. diff --git a/tests/queries/0_stateless/02842_truncate_database.reference b/tests/queries/0_stateless/02842_truncate_database.reference index 71f52bcd1da..bc8c0210d27 100644 --- a/tests/queries/0_stateless/02842_truncate_database.reference +++ b/tests/queries/0_stateless/02842_truncate_database.reference @@ -20,3 +20,5 @@ source_table_stripe_log source_table_tiny_log === DICTIONARIES IN test_truncate_database === dest_dictionary +new tables +new_table diff --git a/tests/queries/0_stateless/02842_truncate_database.sql b/tests/queries/0_stateless/02842_truncate_database.sql index 09ac844cfe2..be92108ccb8 100644 --- a/tests/queries/0_stateless/02842_truncate_database.sql +++ b/tests/queries/0_stateless/02842_truncate_database.sql @@ -73,4 +73,8 @@ SELECT * FROM dest_dictionary; -- {serverError UNKNOWN_TABLE} SHOW TABLES FROM test_truncate_database; SHOW DICTIONARIES FROM test_truncate_database; +CREATE TABLE new_table (x UInt16) ENGINE = ReplicatedMergeTree ORDER BY x; +select 'new tables'; +SHOW TABLES FROM test_truncate_database; + DROP DATABASE test_truncate_database; From 0642ed19b7c67e443be110f2a0f2d1f032ddd8d5 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 25 Jul 2024 14:17:45 +0000 Subject: [PATCH 198/661] Fixing more tests. --- src/Processors/QueryPlan/ReadFromMergeTree.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index bdb90abd326..e5aeb9686be 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -1525,7 +1525,7 @@ void ReadFromMergeTree::applyFilters(ActionDAGNodes added_filter_nodes) /// TODO: Get rid of filter_actions_dag in query_info after we move analysis of /// parallel replicas and unused shards into optimization, similar to projection analysis. if (filter_actions_dag) - query_info.filter_actions_dag = std::make_shared(std::move(*filter_actions_dag)); + query_info.filter_actions_dag = std::make_shared(filter_actions_dag->clone()); buildIndexes( indexes, From bd721950b0401f94be652c11015bd1985c283f3a Mon Sep 17 00:00:00 2001 From: maxvostrikov Date: Thu, 25 Jul 2024 16:24:17 +0200 Subject: [PATCH 199/661] squash! added somme tests in relation with https://github.com/ClickHouse/ClickHouse/pull/54881 with new behaviour when enable_named_columns_in_function_tuple=1 (default value) --- .../0_stateless/00309_formats.reference | Bin 18736 -> 18666 bytes tests/queries/0_stateless/00309_formats.sql | 4 ++-- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/00309_formats.reference b/tests/queries/0_stateless/00309_formats.reference index a63720618ba54c6cc456f3356512449322dc2e80..f3ea45520bb50fb936caf6724e9fedf3cdd00b75 100644 GIT binary patch delta 54 zcmdlmiSgA$#tkJN8myt7c_kJsE{P?nVj-mkIjI_X3Sc1}g%kylsHVzCEg+hFz{3v! D3NRA? delta 99 zcmaDgk#WN$#tkJNdTgPdc_n5hEG~&9sp27}1v#l2c?u9Q9fcGHkgz5f0|Z>xnrz4= o!o|P{;hxo=JjcUt@=alWVGScwGaYp!0|NtH14~^)V|7g~0E0RhwEzGB diff --git a/tests/queries/0_stateless/00309_formats.sql b/tests/queries/0_stateless/00309_formats.sql index 691fc6e7ab6..0366cdeea5c 100644 --- a/tests/queries/0_stateless/00309_formats.sql +++ b/tests/queries/0_stateless/00309_formats.sql @@ -12,5 +12,5 @@ SELECT number * 246 + 10 AS n, toDate('2000-01-01') + n AS d, range(n) AS arr, a SET enable_named_columns_in_function_tuple = 1; -SELECT number * 246 + 10 AS n, toDate('2000-01-01') + n AS d, (n, d) AS tuple FROM system.numbers LIMIT 2 FORMAT RowBinaryWithNamesAndTypes; -SELECT number * 246 + 10 AS n, toDate('2000-01-01') + n AS d, (n, d) AS tuple FROM system.numbers LIMIT 2 FORMAT TabSeparatedWithNamesAndTypes; +SELECT 36 AS n, toDate('2000-01-01') + n AS d, (n, d) AS tuple FROM system.numbers LIMIT 1 FORMAT RowBinaryWithNamesAndTypes; +SELECT number * 246 + 10 AS n, toDate('2000-01-01') + n AS d, (n, d) AS tuple FROM system.numbers LIMIT 1 FORMAT TabSeparatedWithNamesAndTypes; From 59f9c125044b6e56a3ded8034478eff79e930018 Mon Sep 17 00:00:00 2001 From: Pablo Marcos Date: Thu, 25 Jul 2024 14:37:47 +0000 Subject: [PATCH 200/661] Increase sleep time make sure there is a new failure The previous sleep was already adding +5s to make sure the TTL was properly applied, so we'd rather use the same value here instead of just 1s. --- tests/integration/test_storage_s3_queue/test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/integration/test_storage_s3_queue/test.py b/tests/integration/test_storage_s3_queue/test.py index 4348857acd3..2e339a9b5c9 100644 --- a/tests/integration/test_storage_s3_queue/test.py +++ b/tests/integration/test_storage_s3_queue/test.py @@ -871,7 +871,7 @@ def test_max_set_age(started_cluster): node.query(f"SELECT uniq(_path) from {dst_table_name}") ) - time.sleep(max_age + 5) + time.sleep(max_age + max_age / 2) expected_rows *= 2 wait_for_condition(lambda: get_count() == expected_rows) @@ -922,7 +922,7 @@ def test_max_set_age(started_cluster): ) ) - time.sleep(max_age + 1) + time.sleep(max_age + max_age / 2) assert failed_count + 2 <= get_object_storage_failures() From 1973458ae07a5cd519b7069451d2be5822a89bf7 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 25 Jul 2024 15:00:37 +0000 Subject: [PATCH 201/661] Update PlannerWindowFunctions --- src/Planner/Planner.cpp | 13 ++++++------- src/Planner/PlannerWindowFunctions.cpp | 15 ++------------- src/Planner/PlannerWindowFunctions.h | 2 +- 3 files changed, 9 insertions(+), 21 deletions(-) diff --git a/src/Planner/Planner.cpp b/src/Planner/Planner.cpp index fb721069e6e..968642dc9de 100644 --- a/src/Planner/Planner.cpp +++ b/src/Planner/Planner.cpp @@ -933,19 +933,19 @@ void addPreliminarySortOrDistinctOrLimitStepsIfNeeded(QueryPlan & query_plan, void addWindowSteps(QueryPlan & query_plan, const PlannerContextPtr & planner_context, - const WindowAnalysisResult & window_analysis_result) + WindowAnalysisResult & window_analysis_result) { const auto & query_context = planner_context->getQueryContext(); const auto & settings = query_context->getSettingsRef(); - const auto & window_descriptions = window_analysis_result.window_descriptions; - auto perm = sortWindowDescriptions(window_descriptions); + auto & window_descriptions = window_analysis_result.window_descriptions; + sortWindowDescriptions(window_descriptions); size_t window_descriptions_size = window_descriptions.size(); for (size_t i = 0; i < window_descriptions_size; ++i) { - const auto & window_description = window_descriptions[perm[i]]; + const auto & window_description = window_descriptions[i]; /** We don't need to sort again if the input from previous window already * has suitable sorting. Also don't create sort steps when there are no @@ -958,9 +958,8 @@ void addWindowSteps(QueryPlan & query_plan, bool need_sort = !window_description.full_sort_description.empty(); if (need_sort && i != 0) { - auto prev = perm[i - 1]; - need_sort = !sortDescriptionIsPrefix(window_description.full_sort_description, window_descriptions[prev].full_sort_description) - || (settings.max_threads != 1 && window_description.partition_by.size() != window_descriptions[prev].partition_by.size()); + need_sort = !sortDescriptionIsPrefix(window_description.full_sort_description, window_descriptions[i - 1].full_sort_description) + || (settings.max_threads != 1 && window_description.partition_by.size() != window_descriptions[i - 1].partition_by.size()); } if (need_sort) { diff --git a/src/Planner/PlannerWindowFunctions.cpp b/src/Planner/PlannerWindowFunctions.cpp index ba0e11df76b..225852de5a7 100644 --- a/src/Planner/PlannerWindowFunctions.cpp +++ b/src/Planner/PlannerWindowFunctions.cpp @@ -122,7 +122,7 @@ std::vector extractWindowDescriptions(const QueryTreeNodes & return result; } -std::vector sortWindowDescriptions(const std::vector & window_descriptions) +void sortWindowDescriptions(std::vector & window_descriptions) { auto window_description_comparator = [](const WindowDescription & lhs, const WindowDescription & rhs) { @@ -153,18 +153,7 @@ std::vector sortWindowDescriptions(const std::vector return left.size() > right.size(); }; - auto comparator = [&](size_t lhs, size_t rhs) - { - return window_description_comparator(window_descriptions[lhs], window_descriptions[rhs]); - }; - - std::vector perm(window_descriptions.size()); - for (size_t i = 0; i < perm.size(); ++i) - perm[i] = i; - - ::sort(perm.begin(), perm.end(), comparator); - - return perm; + ::sort(window_descriptions.begin(), window_descriptions.end(), window_description_comparator); } } diff --git a/src/Planner/PlannerWindowFunctions.h b/src/Planner/PlannerWindowFunctions.h index 3039ecefc4b..1552ef5a71f 100644 --- a/src/Planner/PlannerWindowFunctions.h +++ b/src/Planner/PlannerWindowFunctions.h @@ -15,6 +15,6 @@ std::vector extractWindowDescriptions(const QueryTreeNodes & /** Try to sort window descriptions in such an order that the window with the longest * sort description goes first, and all window that use its prefixes follow. */ -std::vector sortWindowDescriptions(const std::vector & window_descriptions); +void sortWindowDescriptions(std::vector & window_descriptions); } From 5ea867231bafc01b4512989f351106b7afcc14af Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Thu, 25 Jul 2024 16:55:23 +0200 Subject: [PATCH 202/661] Read configuration for clickhouse-local from ~/.clickhouse-local Signed-off-by: Azat Khuzhin --- programs/local/LocalServer.cpp | 16 +++++++-- src/Common/Config/CMakeLists.txt | 1 + src/Common/Config/getLocalConfigPath.cpp | 46 ++++++++++++++++++++++++ src/Common/Config/getLocalConfigPath.h | 12 +++++++ 4 files changed, 73 insertions(+), 2 deletions(-) create mode 100644 src/Common/Config/getLocalConfigPath.cpp create mode 100644 src/Common/Config/getLocalConfigPath.h diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index 48e0cca7b73..ade4e0f49df 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -1,6 +1,7 @@ #include "LocalServer.h" #include +#include #include #include #include @@ -127,10 +128,21 @@ void LocalServer::initialize(Poco::Util::Application & self) { Poco::Util::Application::initialize(self); + const char * home_path_cstr = getenv("HOME"); // NOLINT(concurrency-mt-unsafe) + if (home_path_cstr) + home_path = home_path_cstr; + /// Load config files if exists - if (getClientConfiguration().has("config-file") || fs::exists("config.xml")) + std::string config_path; + if (getClientConfiguration().has("config-file")) + config_path = getClientConfiguration().getString("config-file"); + else if (config_path.empty() && fs::exists("config.xml")) + config_path = "config.xml"; + else if (config_path.empty()) + config_path = getLocalConfigPath(home_path).value_or(""); + + if (fs::exists(config_path)) { - const auto config_path = getClientConfiguration().getString("config-file", "config.xml"); ConfigProcessor config_processor(config_path, false, true); ConfigProcessor::setConfigPath(fs::path(config_path).parent_path()); auto loaded_config = config_processor.loadConfig(); diff --git a/src/Common/Config/CMakeLists.txt b/src/Common/Config/CMakeLists.txt index 09095ef5acc..2bd32b98bda 100644 --- a/src/Common/Config/CMakeLists.txt +++ b/src/Common/Config/CMakeLists.txt @@ -2,6 +2,7 @@ set (SRCS AbstractConfigurationComparison.cpp ConfigProcessor.cpp getClientConfigPath.cpp + getLocalConfigPath.cpp ConfigReloader.cpp YAMLParser.cpp ConfigHelper.cpp diff --git a/src/Common/Config/getLocalConfigPath.cpp b/src/Common/Config/getLocalConfigPath.cpp new file mode 100644 index 00000000000..afaa7f79026 --- /dev/null +++ b/src/Common/Config/getLocalConfigPath.cpp @@ -0,0 +1,46 @@ +#include + +#include +#include + + +namespace fs = std::filesystem; + +namespace DB +{ + +std::optional getLocalConfigPath(const std::string & home_path) +{ + std::string config_path; + bool found = false; + + std::vector names; + names.emplace_back("./clickhouse-local"); + if (!home_path.empty()) + names.emplace_back(home_path + "/.clickhouse-local/config"); + names.emplace_back("/etc/clickhouse-local/config"); + + for (const auto & name : names) + { + for (const auto & extension : {".xml", ".yaml", ".yml"}) + { + config_path = name + extension; + + std::error_code ec; + if (fs::exists(config_path, ec)) + { + found = true; + break; + } + } + if (found) + break; + } + + if (found) + return config_path; + + return std::nullopt; +} + +} diff --git a/src/Common/Config/getLocalConfigPath.h b/src/Common/Config/getLocalConfigPath.h new file mode 100644 index 00000000000..14625571d6c --- /dev/null +++ b/src/Common/Config/getLocalConfigPath.h @@ -0,0 +1,12 @@ +#pragma once + +#include +#include + +namespace DB +{ + +/// Return path to existing configuration file. +std::optional getLocalConfigPath(const std::string & home_path); + +} From fb271436a1efe969f4de09b14aec942baa145cb9 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 25 Jul 2024 15:37:13 +0000 Subject: [PATCH 203/661] Remove ActionsDAGPtr completely. --- src/Interpreters/ActionsDAG.h | 3 --- src/Interpreters/evaluateConstantExpression.cpp | 4 ++-- .../optimizePrimaryKeyConditionAndLimit.cpp | 6 +++--- src/Processors/QueryPlan/SourceStepWithFilter.h | 6 +++--- src/Processors/QueryPlan/TotalsHavingStep.h | 3 --- src/Processors/QueryPlan/WindowStep.h | 3 --- src/Storages/MergeTree/KeyCondition.cpp | 14 +++++++------- src/Storages/MergeTree/KeyCondition.h | 2 +- .../MergeTreeSplitPrewhereIntoReadSteps.cpp | 3 +++ src/Storages/SelectQueryInfo.h | 3 --- src/Storages/StorageMerge.cpp | 2 +- 11 files changed, 20 insertions(+), 29 deletions(-) diff --git a/src/Interpreters/ActionsDAG.h b/src/Interpreters/ActionsDAG.h index 76cc9327530..43c1b41a240 100644 --- a/src/Interpreters/ActionsDAG.h +++ b/src/Interpreters/ActionsDAG.h @@ -11,9 +11,6 @@ namespace DB { -class ActionsDAG; -using ActionsDAGPtr = std::unique_ptr; - class IExecutableFunction; using ExecutableFunctionPtr = std::shared_ptr; diff --git a/src/Interpreters/evaluateConstantExpression.cpp b/src/Interpreters/evaluateConstantExpression.cpp index 42d6f3d3037..4bfc80af1fe 100644 --- a/src/Interpreters/evaluateConstantExpression.cpp +++ b/src/Interpreters/evaluateConstantExpression.cpp @@ -679,9 +679,9 @@ std::optional evaluateExpressionOverConstantCondition( size_t max_elements) { auto inverted_dag = KeyCondition::cloneASTWithInversionPushDown({predicate}, context); - auto matches = matchTrees(expr, *inverted_dag, false); + auto matches = matchTrees(expr, inverted_dag, false); - auto predicates = analyze(inverted_dag->getOutputs().at(0), matches, context, max_elements); + auto predicates = analyze(inverted_dag.getOutputs().at(0), matches, context, max_elements); if (!predicates) return {}; diff --git a/src/Processors/QueryPlan/Optimizations/optimizePrimaryKeyConditionAndLimit.cpp b/src/Processors/QueryPlan/Optimizations/optimizePrimaryKeyConditionAndLimit.cpp index 63b4e019066..f53212407d2 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizePrimaryKeyConditionAndLimit.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizePrimaryKeyConditionAndLimit.cpp @@ -18,16 +18,16 @@ void optimizePrimaryKeyConditionAndLimit(const Stack & stack) const auto & storage_prewhere_info = source_step_with_filter->getPrewhereInfo(); if (storage_prewhere_info) { - source_step_with_filter->addFilter(std::make_unique(storage_prewhere_info->prewhere_actions->clone()), storage_prewhere_info->prewhere_column_name); + source_step_with_filter->addFilter(storage_prewhere_info->prewhere_actions->clone(), storage_prewhere_info->prewhere_column_name); if (storage_prewhere_info->row_level_filter) - source_step_with_filter->addFilter(std::make_unique(storage_prewhere_info->row_level_filter->clone()), storage_prewhere_info->row_level_column_name); + source_step_with_filter->addFilter(storage_prewhere_info->row_level_filter->clone(), storage_prewhere_info->row_level_column_name); } for (auto iter = stack.rbegin() + 1; iter != stack.rend(); ++iter) { if (auto * filter_step = typeid_cast(iter->node->step.get())) { - source_step_with_filter->addFilter(std::make_unique(filter_step->getExpression().clone()), filter_step->getFilterColumnName()); + source_step_with_filter->addFilter(filter_step->getExpression().clone(), filter_step->getFilterColumnName()); } else if (auto * limit_step = typeid_cast(iter->node->step.get())) { diff --git a/src/Processors/QueryPlan/SourceStepWithFilter.h b/src/Processors/QueryPlan/SourceStepWithFilter.h index f7a030c0628..6cea5fd7245 100644 --- a/src/Processors/QueryPlan/SourceStepWithFilter.h +++ b/src/Processors/QueryPlan/SourceStepWithFilter.h @@ -45,9 +45,9 @@ public: const Names & requiredSourceColumns() const { return required_source_columns; } - void addFilter(ActionsDAGPtr filter_dag, std::string column_name) + void addFilter(ActionsDAG filter_dag, std::string column_name) { - filter_nodes.nodes.push_back(&filter_dag->findInOutputs(column_name)); + filter_nodes.nodes.push_back(&filter_dag.findInOutputs(column_name)); filter_dags.push_back(std::move(filter_dag)); } @@ -86,7 +86,7 @@ protected: private: /// Will be cleared after applyFilters() is called. ActionDAGNodes filter_nodes; - std::vector filter_dags; + std::vector filter_dags; }; } diff --git a/src/Processors/QueryPlan/TotalsHavingStep.h b/src/Processors/QueryPlan/TotalsHavingStep.h index 927b8d99de3..4b414d41c57 100644 --- a/src/Processors/QueryPlan/TotalsHavingStep.h +++ b/src/Processors/QueryPlan/TotalsHavingStep.h @@ -6,9 +6,6 @@ namespace DB { -class ActionsDAG; -using ActionsDAGPtr = std::unique_ptr; - enum class TotalsMode : uint8_t; /// Execute HAVING and calculate totals. See TotalsHavingTransform. diff --git a/src/Processors/QueryPlan/WindowStep.h b/src/Processors/QueryPlan/WindowStep.h index 47883e5edf6..d79cd7fd45e 100644 --- a/src/Processors/QueryPlan/WindowStep.h +++ b/src/Processors/QueryPlan/WindowStep.h @@ -6,9 +6,6 @@ namespace DB { -class ActionsDAG; -using ActionsDAGPtr = std::unique_ptr; - class WindowTransform; class WindowStep : public ITransformingStep diff --git a/src/Storages/MergeTree/KeyCondition.cpp b/src/Storages/MergeTree/KeyCondition.cpp index 0eb59a47cae..69bffac9160 100644 --- a/src/Storages/MergeTree/KeyCondition.cpp +++ b/src/Storages/MergeTree/KeyCondition.cpp @@ -696,22 +696,22 @@ const std::unordered_map KeyConditi {"hilbertEncode", SpaceFillingCurveType::Hilbert} }; -ActionsDAGPtr KeyCondition::cloneASTWithInversionPushDown(ActionsDAG::NodeRawConstPtrs nodes, const ContextPtr & context) +ActionsDAG KeyCondition::cloneASTWithInversionPushDown(ActionsDAG::NodeRawConstPtrs nodes, const ContextPtr & context) { - auto res = std::make_unique(); + ActionsDAG res; std::unordered_map to_inverted; for (auto & node : nodes) - node = &DB::cloneASTWithInversionPushDown(*node, *res, to_inverted, context, false); + node = &DB::cloneASTWithInversionPushDown(*node, res, to_inverted, context, false); if (nodes.size() > 1) { auto function_builder = FunctionFactory::instance().get("and", context); - nodes = {&res->addFunction(function_builder, std::move(nodes), "")}; + nodes = {&res.addFunction(function_builder, std::move(nodes), "")}; } - res->getOutputs().swap(nodes); + res.getOutputs().swap(nodes); return res; } @@ -826,9 +826,9 @@ KeyCondition::KeyCondition( * are pushed down and applied (when possible) to leaf nodes. */ auto inverted_dag = cloneASTWithInversionPushDown({filter_dag->getOutputs().at(0)}, context); - assert(inverted_dag->getOutputs().size() == 1); + assert(inverted_dag.getOutputs().size() == 1); - const auto * inverted_dag_filter_node = inverted_dag->getOutputs()[0]; + const auto * inverted_dag_filter_node = inverted_dag.getOutputs()[0]; RPNBuilder builder(inverted_dag_filter_node, context, [&](const RPNBuilderTreeNode & node, RPNElement & out) { diff --git a/src/Storages/MergeTree/KeyCondition.h b/src/Storages/MergeTree/KeyCondition.h index a9e1a589ba5..e9343ec08ea 100644 --- a/src/Storages/MergeTree/KeyCondition.h +++ b/src/Storages/MergeTree/KeyCondition.h @@ -134,7 +134,7 @@ public: DataTypePtr current_type, bool single_point = false); - static ActionsDAGPtr cloneASTWithInversionPushDown(ActionsDAG::NodeRawConstPtrs nodes, const ContextPtr & context); + static ActionsDAG cloneASTWithInversionPushDown(ActionsDAG::NodeRawConstPtrs nodes, const ContextPtr & context); bool matchesExactContinuousRange() const; diff --git a/src/Storages/MergeTree/MergeTreeSplitPrewhereIntoReadSteps.cpp b/src/Storages/MergeTree/MergeTreeSplitPrewhereIntoReadSteps.cpp index 116edf5b9cb..1d0569e0df6 100644 --- a/src/Storages/MergeTree/MergeTreeSplitPrewhereIntoReadSteps.cpp +++ b/src/Storages/MergeTree/MergeTreeSplitPrewhereIntoReadSteps.cpp @@ -10,6 +10,9 @@ namespace DB { +class ActionsDAG; +using ActionsDAGPtr = std::unique_ptr; + namespace ErrorCodes { extern const int LOGICAL_ERROR; diff --git a/src/Storages/SelectQueryInfo.h b/src/Storages/SelectQueryInfo.h index 60f103fdb70..1c4cb7d92d8 100644 --- a/src/Storages/SelectQueryInfo.h +++ b/src/Storages/SelectQueryInfo.h @@ -18,9 +18,6 @@ namespace DB class ExpressionActions; using ExpressionActionsPtr = std::shared_ptr; -class ActionsDAG; -using ActionsDAGPtr = std::unique_ptr; - struct PrewhereInfo; using PrewhereInfoPtr = std::shared_ptr; diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp index 0e1568c8e79..e5de15c1d21 100644 --- a/src/Storages/StorageMerge.cpp +++ b/src/Storages/StorageMerge.cpp @@ -1279,7 +1279,7 @@ void ReadFromMerge::RowPolicyData::extendNames(Names & names) const void ReadFromMerge::RowPolicyData::addStorageFilter(SourceStepWithFilter * step) const { - step->addFilter(std::make_unique(actions_dag.clone()), filter_column_name); + step->addFilter(actions_dag.clone(), filter_column_name); } void ReadFromMerge::RowPolicyData::addFilterTransform(QueryPlan & plan) const From ccd92d20821903123d4748027cc2248095b34efa Mon Sep 17 00:00:00 2001 From: kssenii Date: Thu, 25 Jul 2024 17:44:26 +0200 Subject: [PATCH 204/661] Update chassert in cache --- src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp b/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp index 198f6c0ea04..c928d25c7b8 100644 --- a/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp +++ b/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp @@ -810,6 +810,7 @@ bool CachedOnDiskReadBufferFromFile::nextImplStep() { last_caller_id = FileSegment::getCallerId(); + chassert(file_offset_of_buffer_end <= read_until_position); if (file_offset_of_buffer_end == read_until_position) return false; @@ -1051,7 +1052,11 @@ bool CachedOnDiskReadBufferFromFile::nextImplStep() if (download_current_segment && download_current_segment_succeeded) chassert(file_segment.getCurrentWriteOffset() >= file_offset_of_buffer_end); - chassert(file_offset_of_buffer_end <= read_until_position); + + chassert( + file_offset_of_buffer_end <= read_until_position, + fmt::format("Expected {} <= {} (size: {}, read range: {})", + file_offset_of_buffer_end, read_until_position, size, current_read_range.toString())); } swap(*implementation_buffer); From e199fbaeaadd05b28e9dee1265fc813b081071f1 Mon Sep 17 00:00:00 2001 From: Sema Checherinda <104093494+CheSema@users.noreply.github.com> Date: Thu, 25 Jul 2024 17:55:32 +0200 Subject: [PATCH 205/661] Update tests/queries/0_stateless/01171_mv_select_insert_isolation_long.sh --- .../0_stateless/01171_mv_select_insert_isolation_long.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01171_mv_select_insert_isolation_long.sh b/tests/queries/0_stateless/01171_mv_select_insert_isolation_long.sh index 620281ee972..13aa64d3cbe 100755 --- a/tests/queries/0_stateless/01171_mv_select_insert_isolation_long.sh +++ b/tests/queries/0_stateless/01171_mv_select_insert_isolation_long.sh @@ -168,7 +168,7 @@ fi START_TIME=$(get_now) STOP_TIME=$((START_TIME + MAIN_TIME_PART)) SECOND_STOP_TIME=$((STOP_TIME + SECOND_TIME_PART)) -MIN_ITERATIONS=25 +MIN_ITERATIONS=20 run_until_deadline_and_at_least_times $STOP_TIME $MIN_ITERATIONS insert_commit_action 1 & PID_1=$! run_until_deadline_and_at_least_times $STOP_TIME $MIN_ITERATIONS insert_commit_action 2 & PID_2=$! From 869f6a6f105f50aa4d0e71e6440646b78539f0ff Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 25 Jul 2024 16:33:12 +0000 Subject: [PATCH 206/661] Updating PrewhereInfo --- src/Interpreters/ExpressionAnalyzer.cpp | 9 ++++----- src/Interpreters/InterpreterSelectQuery.cpp | 16 ++++++++-------- src/Planner/PlannerJoinTree.cpp | 8 +++----- .../QueryPlan/Optimizations/optimizePrewhere.cpp | 8 ++++---- .../optimizePrimaryKeyConditionAndLimit.cpp | 2 +- .../Optimizations/optimizeReadInOrder.cpp | 12 +++++------- .../Optimizations/projectionsCommon.cpp | 15 ++++++--------- src/Processors/QueryPlan/ReadFromMergeTree.cpp | 12 ++++-------- .../QueryPlan/SourceStepWithFilter.cpp | 9 +++------ src/Storages/IStorage.cpp | 3 +-- .../MergeTree/MergeTreePrefetchedReadPool.cpp | 2 +- src/Storages/MergeTree/MergeTreeReadPoolBase.cpp | 2 +- .../MergeTree/MergeTreeSelectProcessor.cpp | 4 ++-- .../MergeTreeSplitPrewhereIntoReadSteps.cpp | 6 +++--- src/Storages/SelectQueryInfo.h | 7 +++---- src/Storages/StorageBuffer.cpp | 7 +++---- 16 files changed, 52 insertions(+), 70 deletions(-) diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index 5972d89bddd..d25434a515d 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -2230,12 +2230,11 @@ void ExpressionAnalysisResult::checkActions() const /// Check that PREWHERE doesn't contain unusual actions. Unusual actions are that can change number of rows. if (hasPrewhere()) { - auto check_actions = [](const std::optional & actions) + auto check_actions = [](ActionsDAG & actions) { - if (actions) - for (const auto & node : actions->getNodes()) - if (node.type == ActionsDAG::ActionType::ARRAY_JOIN) - throw Exception(ErrorCodes::ILLEGAL_PREWHERE, "PREWHERE cannot contain ARRAY JOIN action"); + for (const auto & node : actions.getNodes()) + if (node.type == ActionsDAG::ActionType::ARRAY_JOIN) + throw Exception(ErrorCodes::ILLEGAL_PREWHERE, "PREWHERE cannot contain ARRAY JOIN action"); }; check_actions(prewhere_info->prewhere_actions); diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index 174e6b5b0e0..4fd6f7a2900 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -937,7 +937,7 @@ bool InterpreterSelectQuery::adjustParallelReplicasAfterAnalysis() { { const auto & node - = query_info_copy.prewhere_info->prewhere_actions->findInOutputs(query_info_copy.prewhere_info->prewhere_column_name); + = query_info_copy.prewhere_info->prewhere_actions.findInOutputs(query_info_copy.prewhere_info->prewhere_column_name); added_filter_nodes.nodes.push_back(&node); } @@ -1058,7 +1058,7 @@ Block InterpreterSelectQuery::getSampleBlockImpl() if (analysis_result.prewhere_info) { - header = analysis_result.prewhere_info->prewhere_actions->updateHeader(header); + header = analysis_result.prewhere_info->prewhere_actions.updateHeader(header); if (analysis_result.prewhere_info->remove_prewhere_column) header.erase(analysis_result.prewhere_info->prewhere_column_name); } @@ -1521,7 +1521,7 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional

( query_plan.getCurrentDataStream(), - expressions.prewhere_info->prewhere_actions->clone(), + expressions.prewhere_info->prewhere_actions.clone(), expressions.prewhere_info->prewhere_column_name, expressions.prewhere_info->remove_prewhere_column); @@ -2066,7 +2066,7 @@ void InterpreterSelectQuery::addEmptySourceToQueryPlan(QueryPlan & query_plan, c }); } - auto filter_actions = std::make_shared(prewhere_info.prewhere_actions->clone()); + auto filter_actions = std::make_shared(prewhere_info.prewhere_actions.clone()); pipe.addSimpleTransform([&](const Block & header) { return std::make_shared( @@ -2157,7 +2157,7 @@ void InterpreterSelectQuery::addPrewhereAliasActions() if (prewhere_info) { /// Get some columns directly from PREWHERE expression actions - auto prewhere_required_columns = prewhere_info->prewhere_actions->getRequiredColumns().getNames(); + auto prewhere_required_columns = prewhere_info->prewhere_actions.getRequiredColumns().getNames(); columns.insert(prewhere_required_columns.begin(), prewhere_required_columns.end()); if (prewhere_info->row_level_filter) @@ -2229,7 +2229,7 @@ void InterpreterSelectQuery::addPrewhereAliasActions() if (prewhere_info) { NameSet columns_to_remove(columns_to_remove_after_prewhere.begin(), columns_to_remove_after_prewhere.end()); - Block prewhere_actions_result = prewhere_info->prewhere_actions->getResultColumns(); + Block prewhere_actions_result = prewhere_info->prewhere_actions.getResultColumns(); /// Populate required columns with the columns, added by PREWHERE actions and not removed afterwards. /// XXX: looks hacky that we already know which columns after PREWHERE we won't need for sure. @@ -2268,7 +2268,7 @@ void InterpreterSelectQuery::addPrewhereAliasActions() { /// Don't remove columns which are needed to be aliased. for (const auto & name : required_columns) - prewhere_info->prewhere_actions->tryRestoreColumn(name); + prewhere_info->prewhere_actions.tryRestoreColumn(name); /// Add physical columns required by prewhere actions. for (const auto & column : required_columns_from_prewhere) @@ -2326,7 +2326,7 @@ std::optional InterpreterSelectQuery::getTrivialCount(UInt64 max_paralle if (analysis_result.hasPrewhere()) { auto & prewhere_info = analysis_result.prewhere_info; - filter_nodes.push_back(&prewhere_info->prewhere_actions->findInOutputs(prewhere_info->prewhere_column_name)); + filter_nodes.push_back(&prewhere_info->prewhere_actions.findInOutputs(prewhere_info->prewhere_column_name)); if (prewhere_info->row_level_filter) filter_nodes.push_back(&prewhere_info->row_level_filter->findInOutputs(prewhere_info->row_level_column_name)); diff --git a/src/Planner/PlannerJoinTree.cpp b/src/Planner/PlannerJoinTree.cpp index e9f886ab162..a3db0395ccc 100644 --- a/src/Planner/PlannerJoinTree.cpp +++ b/src/Planner/PlannerJoinTree.cpp @@ -437,7 +437,7 @@ void updatePrewhereOutputsIfNeeded(SelectQueryInfo & table_expression_query_info std::unordered_set required_output_nodes; - for (const auto * input : prewhere_actions->getInputs()) + for (const auto * input : prewhere_actions.getInputs()) { if (required_columns.contains(input->result_name)) required_output_nodes.insert(input); @@ -446,7 +446,7 @@ void updatePrewhereOutputsIfNeeded(SelectQueryInfo & table_expression_query_info if (required_output_nodes.empty()) return; - auto & prewhere_outputs = prewhere_actions->getOutputs(); + auto & prewhere_outputs = prewhere_actions.getOutputs(); for (const auto & output : prewhere_outputs) { auto required_output_node_it = required_output_nodes.find(output); @@ -801,10 +801,8 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres if (storage->canMoveConditionsToPrewhere() && optimize_move_to_prewhere && (!supported_prewhere_columns || supported_prewhere_columns->contains(filter_info.column_name))) { if (!prewhere_info) - prewhere_info = std::make_shared(); - - if (!prewhere_info->prewhere_actions) { + prewhere_info = std::make_shared(); prewhere_info->prewhere_actions = std::move(filter_info.actions); prewhere_info->prewhere_column_name = filter_info.column_name; prewhere_info->remove_prewhere_column = filter_info.do_remove_column; diff --git a/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp b/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp index 5711189136c..dc73521210a 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp @@ -56,7 +56,7 @@ void optimizePrewhere(Stack & stack, QueryPlan::Nodes &) return; const auto & storage_prewhere_info = source_step_with_filter->getPrewhereInfo(); - if (storage_prewhere_info && storage_prewhere_info->prewhere_actions) + if (storage_prewhere_info) return; /// TODO: We can also check for UnionStep, such as StorageBuffer and local distributed plans. @@ -165,16 +165,16 @@ void optimizePrewhere(Stack & stack, QueryPlan::Nodes &) { prewhere_info->prewhere_column_name = conditions.front()->result_name; if (prewhere_info->remove_prewhere_column) - prewhere_info->prewhere_actions->getOutputs().push_back(conditions.front()); + prewhere_info->prewhere_actions.getOutputs().push_back(conditions.front()); } else { prewhere_info->remove_prewhere_column = true; FunctionOverloadResolverPtr func_builder_and = std::make_unique(std::make_shared()); - const auto * node = &prewhere_info->prewhere_actions->addFunction(func_builder_and, std::move(conditions), {}); + const auto * node = &prewhere_info->prewhere_actions.addFunction(func_builder_and, std::move(conditions), {}); prewhere_info->prewhere_column_name = node->result_name; - prewhere_info->prewhere_actions->getOutputs().push_back(node); + prewhere_info->prewhere_actions.getOutputs().push_back(node); } source_step_with_filter->updatePrewhereInfo(prewhere_info); diff --git a/src/Processors/QueryPlan/Optimizations/optimizePrimaryKeyConditionAndLimit.cpp b/src/Processors/QueryPlan/Optimizations/optimizePrimaryKeyConditionAndLimit.cpp index f53212407d2..490b79fbf8d 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizePrimaryKeyConditionAndLimit.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizePrimaryKeyConditionAndLimit.cpp @@ -18,7 +18,7 @@ void optimizePrimaryKeyConditionAndLimit(const Stack & stack) const auto & storage_prewhere_info = source_step_with_filter->getPrewhereInfo(); if (storage_prewhere_info) { - source_step_with_filter->addFilter(storage_prewhere_info->prewhere_actions->clone(), storage_prewhere_info->prewhere_column_name); + source_step_with_filter->addFilter(storage_prewhere_info->prewhere_actions.clone(), storage_prewhere_info->prewhere_column_name); if (storage_prewhere_info->row_level_filter) source_step_with_filter->addFilter(storage_prewhere_info->row_level_filter->clone(), storage_prewhere_info->row_level_column_name); } diff --git a/src/Processors/QueryPlan/Optimizations/optimizeReadInOrder.cpp b/src/Processors/QueryPlan/Optimizations/optimizeReadInOrder.cpp index 252420e19fe..99df6da263f 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizeReadInOrder.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizeReadInOrder.cpp @@ -191,13 +191,11 @@ void buildSortingDAG(QueryPlan::Node & node, std::optional & dag, Fi /// Should ignore limit if there is filtering. limit = 0; - if (prewhere_info->prewhere_actions) - { - //std::cerr << "====== Adding prewhere " << std::endl; - appendExpression(dag, *prewhere_info->prewhere_actions); - if (const auto * filter_expression = dag->tryFindInOutputs(prewhere_info->prewhere_column_name)) - appendFixedColumnsFromFilterExpression(*filter_expression, fixed_columns); - } + //std::cerr << "====== Adding prewhere " << std::endl; + appendExpression(dag, prewhere_info->prewhere_actions); + if (const auto * filter_expression = dag->tryFindInOutputs(prewhere_info->prewhere_column_name)) + appendFixedColumnsFromFilterExpression(*filter_expression, fixed_columns); + } return; } diff --git a/src/Processors/QueryPlan/Optimizations/projectionsCommon.cpp b/src/Processors/QueryPlan/Optimizations/projectionsCommon.cpp index 571d1dd0cc1..7414d479cc9 100644 --- a/src/Processors/QueryPlan/Optimizations/projectionsCommon.cpp +++ b/src/Processors/QueryPlan/Optimizations/projectionsCommon.cpp @@ -128,15 +128,12 @@ bool QueryDAG::buildImpl(QueryPlan::Node & node, ActionsDAG::NodeRawConstPtrs & return false; } - if (prewhere_info->prewhere_actions) - { - appendExpression(*prewhere_info->prewhere_actions); - if (const auto * filter_expression - = findInOutputs(*dag, prewhere_info->prewhere_column_name, prewhere_info->remove_prewhere_column)) - filter_nodes.push_back(filter_expression); - else - return false; - } + appendExpression(prewhere_info->prewhere_actions); + if (const auto * filter_expression + = findInOutputs(*dag, prewhere_info->prewhere_column_name, prewhere_info->remove_prewhere_column)) + filter_nodes.push_back(filter_expression); + else + return false; } return true; } diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index e5aeb9686be..483876dd293 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -109,8 +109,7 @@ bool restorePrewhereInputs(PrewhereInfo & info, const NameSet & inputs) if (info.row_level_filter) added = added || restoreDAGInputs(*info.row_level_filter, inputs); - if (info.prewhere_actions) - added = added || restoreDAGInputs(*info.prewhere_actions, inputs); + added = added || restoreDAGInputs(info.prewhere_actions, inputs); return added; } @@ -175,9 +174,8 @@ static void updateSortDescriptionForOutputStream( Block original_header = output_stream.header.cloneEmpty(); if (prewhere_info) { - if (prewhere_info->prewhere_actions) { - FindOriginalNodeForOutputName original_column_finder(*prewhere_info->prewhere_actions); + FindOriginalNodeForOutputName original_column_finder(prewhere_info->prewhere_actions); for (auto & column : original_header) { const auto * original_node = original_column_finder.find(column.name); @@ -2131,7 +2129,6 @@ void ReadFromMergeTree::describeActions(FormatSettings & format_settings) const prefix.push_back(format_settings.indent_char); prefix.push_back(format_settings.indent_char); - if (prewhere_info->prewhere_actions) { format_settings.out << prefix << "Prewhere filter" << '\n'; format_settings.out << prefix << "Prewhere filter column: " << prewhere_info->prewhere_column_name; @@ -2139,7 +2136,7 @@ void ReadFromMergeTree::describeActions(FormatSettings & format_settings) const format_settings.out << " (removed)"; format_settings.out << '\n'; - auto expression = std::make_shared(prewhere_info->prewhere_actions->clone()); + auto expression = std::make_shared(prewhere_info->prewhere_actions.clone()); expression->describeActions(format_settings.out, prefix); } @@ -2169,12 +2166,11 @@ void ReadFromMergeTree::describeActions(JSONBuilder::JSONMap & map) const std::unique_ptr prewhere_info_map = std::make_unique(); prewhere_info_map->add("Need filter", prewhere_info->need_filter); - if (prewhere_info->prewhere_actions) { std::unique_ptr prewhere_filter_map = std::make_unique(); prewhere_filter_map->add("Prewhere filter column", prewhere_info->prewhere_column_name); prewhere_filter_map->add("Prewhere filter remove filter column", prewhere_info->remove_prewhere_column); - auto expression = std::make_shared(prewhere_info->prewhere_actions->clone()); + auto expression = std::make_shared(prewhere_info->prewhere_actions.clone()); prewhere_filter_map->add("Prewhere filter expression", expression->toTree()); prewhere_info_map->add("Prewhere filter", std::move(prewhere_filter_map)); diff --git a/src/Processors/QueryPlan/SourceStepWithFilter.cpp b/src/Processors/QueryPlan/SourceStepWithFilter.cpp index b91debc8239..3de9ae37db0 100644 --- a/src/Processors/QueryPlan/SourceStepWithFilter.cpp +++ b/src/Processors/QueryPlan/SourceStepWithFilter.cpp @@ -34,9 +34,8 @@ Block SourceStepWithFilter::applyPrewhereActions(Block block, const PrewhereInfo block.erase(prewhere_info->row_level_column_name); } - if (prewhere_info->prewhere_actions) { - block = prewhere_info->prewhere_actions->updateHeader(block); + block = prewhere_info->prewhere_actions.updateHeader(block); auto & prewhere_column = block.getByName(prewhere_info->prewhere_column_name); if (!prewhere_column.type->canBeUsedInBooleanContext()) @@ -102,7 +101,6 @@ void SourceStepWithFilter::describeActions(FormatSettings & format_settings) con prefix.push_back(format_settings.indent_char); prefix.push_back(format_settings.indent_char); - if (prewhere_info->prewhere_actions) { format_settings.out << prefix << "Prewhere filter" << '\n'; format_settings.out << prefix << "Prewhere filter column: " << prewhere_info->prewhere_column_name; @@ -110,7 +108,7 @@ void SourceStepWithFilter::describeActions(FormatSettings & format_settings) con format_settings.out << " (removed)"; format_settings.out << '\n'; - auto expression = std::make_shared(prewhere_info->prewhere_actions->clone()); + auto expression = std::make_shared(prewhere_info->prewhere_actions.clone()); expression->describeActions(format_settings.out, prefix); } @@ -132,12 +130,11 @@ void SourceStepWithFilter::describeActions(JSONBuilder::JSONMap & map) const std::unique_ptr prewhere_info_map = std::make_unique(); prewhere_info_map->add("Need filter", prewhere_info->need_filter); - if (prewhere_info->prewhere_actions) { std::unique_ptr prewhere_filter_map = std::make_unique(); prewhere_filter_map->add("Prewhere filter column", prewhere_info->prewhere_column_name); prewhere_filter_map->add("Prewhere filter remove filter column", prewhere_info->remove_prewhere_column); - auto expression = std::make_shared(prewhere_info->prewhere_actions->clone()); + auto expression = std::make_shared(prewhere_info->prewhere_actions.clone()); prewhere_filter_map->add("Prewhere filter expression", expression->toTree()); prewhere_info_map->add("Prewhere filter", std::move(prewhere_filter_map)); diff --git a/src/Storages/IStorage.cpp b/src/Storages/IStorage.cpp index 823a6ae1cbc..755d71df531 100644 --- a/src/Storages/IStorage.cpp +++ b/src/Storages/IStorage.cpp @@ -325,9 +325,8 @@ std::string PrewhereInfo::dump() const ss << "row_level_filter " << row_level_filter->dumpDAG() << "\n"; } - if (prewhere_actions) { - ss << "prewhere_actions " << prewhere_actions->dumpDAG() << "\n"; + ss << "prewhere_actions " << prewhere_actions.dumpDAG() << "\n"; } ss << "remove_prewhere_column " << remove_prewhere_column diff --git a/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp b/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp index 26595fbb36d..a9b77fb6c03 100644 --- a/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp +++ b/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp @@ -329,7 +329,7 @@ void MergeTreePrefetchedReadPool::fillPerPartStatistics() part_stat.sum_marks += range.end - range.begin; const auto & columns = settings.merge_tree_determine_task_size_by_prewhere_columns && prewhere_info - ? prewhere_info->prewhere_actions->getRequiredColumnsNames() + ? prewhere_info->prewhere_actions.getRequiredColumnsNames() : column_names; part_stat.approx_size_of_mark = getApproximateSizeOfGranule(*read_info.data_part, columns); diff --git a/src/Storages/MergeTree/MergeTreeReadPoolBase.cpp b/src/Storages/MergeTree/MergeTreeReadPoolBase.cpp index 46482bc0959..6d2560bc9c7 100644 --- a/src/Storages/MergeTree/MergeTreeReadPoolBase.cpp +++ b/src/Storages/MergeTree/MergeTreeReadPoolBase.cpp @@ -65,7 +65,7 @@ static size_t calculateMinMarksPerTask( /// Which means in turn that for most of the rows we will read only the columns from prewhere clause. /// So it makes sense to use only them for the estimation. const auto & columns = settings.merge_tree_determine_task_size_by_prewhere_columns && prewhere_info - ? prewhere_info->prewhere_actions->getRequiredColumnsNames() + ? prewhere_info->prewhere_actions.getRequiredColumnsNames() : columns_to_read; const size_t part_compressed_bytes = getApproxSizeOfPart(*part.data_part, columns); diff --git a/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp b/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp index f1df9e231c4..1a0709faf1c 100644 --- a/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp +++ b/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp @@ -59,7 +59,7 @@ MergeTreeSelectProcessor::MergeTreeSelectProcessor( if (prewhere_info) LOG_TEST(log, "Original PREWHERE DAG:\n{}\nPREWHERE actions:\n{}", - (prewhere_info->prewhere_actions ? prewhere_info->prewhere_actions->dumpDAG(): std::string("")), + prewhere_info->prewhere_actions.dumpDAG(), (!prewhere_actions.steps.empty() ? prewhere_actions.dump() : std::string(""))); } @@ -96,7 +96,7 @@ PrewhereExprInfo MergeTreeSelectProcessor::getPrewhereActions(PrewhereInfoPtr pr PrewhereExprStep prewhere_step { .type = PrewhereExprStep::Filter, - .actions = std::make_shared(prewhere_info->prewhere_actions->clone(), actions_settings), + .actions = std::make_shared(prewhere_info->prewhere_actions.clone(), actions_settings), .filter_column_name = prewhere_info->prewhere_column_name, .remove_filter_column = prewhere_info->remove_prewhere_column, .need_filter = prewhere_info->need_filter, diff --git a/src/Storages/MergeTree/MergeTreeSplitPrewhereIntoReadSteps.cpp b/src/Storages/MergeTree/MergeTreeSplitPrewhereIntoReadSteps.cpp index 1d0569e0df6..36ff6c0a4bd 100644 --- a/src/Storages/MergeTree/MergeTreeSplitPrewhereIntoReadSteps.cpp +++ b/src/Storages/MergeTree/MergeTreeSplitPrewhereIntoReadSteps.cpp @@ -216,11 +216,11 @@ const ActionsDAG::Node & addAndTrue( /// 8. Add computation of the remaining outputs to the last step with the procedure similar to 4 bool tryBuildPrewhereSteps(PrewhereInfoPtr prewhere_info, const ExpressionActionsSettings & actions_settings, PrewhereExprInfo & prewhere) { - if (!prewhere_info || !prewhere_info->prewhere_actions) + if (!prewhere_info) return true; /// 1. List all condition nodes that are combined with AND into PREWHERE condition - const auto & condition_root = prewhere_info->prewhere_actions->findInOutputs(prewhere_info->prewhere_column_name); + const auto & condition_root = prewhere_info->prewhere_actions.findInOutputs(prewhere_info->prewhere_column_name); const bool is_conjunction = (condition_root.type == ActionsDAG::ActionType::FUNCTION && condition_root.function_base->getName() == "and"); if (!is_conjunction) return false; @@ -306,7 +306,7 @@ bool tryBuildPrewhereSteps(PrewhereInfoPtr prewhere_info, const ExpressionAction } /// 6. Find all outputs of the original DAG - auto original_outputs = prewhere_info->prewhere_actions->getOutputs(); + auto original_outputs = prewhere_info->prewhere_actions.getOutputs(); /// 7. Find all outputs that were computed in the already built DAGs, mark these nodes as outputs in the steps where they were computed /// 8. Add computation of the remaining outputs to the last step with the procedure similar to 4 NameSet all_output_names; diff --git a/src/Storages/SelectQueryInfo.h b/src/Storages/SelectQueryInfo.h index 1c4cb7d92d8..7ad6a733c6f 100644 --- a/src/Storages/SelectQueryInfo.h +++ b/src/Storages/SelectQueryInfo.h @@ -45,7 +45,7 @@ struct PrewhereInfo /// This actions are separate because prewhere condition should not be executed over filtered rows. std::optional row_level_filter; /// Actions which are executed on block in order to get filter column for prewhere step. - std::optional prewhere_actions; + ActionsDAG prewhere_actions; String row_level_column_name; String prewhere_column_name; bool remove_prewhere_column = false; @@ -53,7 +53,7 @@ struct PrewhereInfo bool generated_by_optimizer = false; PrewhereInfo() = default; - explicit PrewhereInfo(std::optional prewhere_actions_, String prewhere_column_name_) + explicit PrewhereInfo(ActionsDAG prewhere_actions_, String prewhere_column_name_) : prewhere_actions(std::move(prewhere_actions_)), prewhere_column_name(std::move(prewhere_column_name_)) {} std::string dump() const; @@ -65,8 +65,7 @@ struct PrewhereInfo if (row_level_filter) prewhere_info->row_level_filter = row_level_filter->clone(); - if (prewhere_actions) - prewhere_info->prewhere_actions = prewhere_actions->clone(); + prewhere_info->prewhere_actions = prewhere_actions.clone(); prewhere_info->row_level_column_name = row_level_column_name; prewhere_info->prewhere_column_name = prewhere_column_name; diff --git a/src/Storages/StorageBuffer.cpp b/src/Storages/StorageBuffer.cpp index aee4e4683ad..04e6d6676d1 100644 --- a/src/Storages/StorageBuffer.cpp +++ b/src/Storages/StorageBuffer.cpp @@ -319,13 +319,12 @@ void StorageBuffer::read( src_table_query_info.prewhere_info->row_level_filter->removeUnusedActions(); } - if (src_table_query_info.prewhere_info->prewhere_actions) { src_table_query_info.prewhere_info->prewhere_actions = ActionsDAG::merge( actions_dag.clone(), - std::move(*src_table_query_info.prewhere_info->prewhere_actions)); + std::move(src_table_query_info.prewhere_info->prewhere_actions)); - src_table_query_info.prewhere_info->prewhere_actions->removeUnusedActions(); + src_table_query_info.prewhere_info->prewhere_actions.removeUnusedActions(); } } @@ -440,7 +439,7 @@ void StorageBuffer::read( }); } - auto actions = std::make_shared(query_info.prewhere_info->prewhere_actions->clone(), actions_settings); + auto actions = std::make_shared(query_info.prewhere_info->prewhere_actions.clone(), actions_settings); pipe_from_buffers.addSimpleTransform([&](const Block & header) { return std::make_shared( From 638d4640959f93924cec00b172d1cc1837d9ac10 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 25 Jul 2024 18:42:16 +0200 Subject: [PATCH 207/661] Fix test `00673_subquery_prepared_set_performance` --- .../0_stateless/00673_subquery_prepared_set_performance.sql | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/queries/0_stateless/00673_subquery_prepared_set_performance.sql b/tests/queries/0_stateless/00673_subquery_prepared_set_performance.sql index 0591592344c..b938d54c646 100644 --- a/tests/queries/0_stateless/00673_subquery_prepared_set_performance.sql +++ b/tests/queries/0_stateless/00673_subquery_prepared_set_performance.sql @@ -1,3 +1,5 @@ +-- Tags: no-tsan + DROP TABLE IF EXISTS mergetree_00673; CREATE TABLE mergetree_00673 (x UInt64) ENGINE = MergeTree ORDER BY x; From 7a003237befaa8d58cb6a77bb47e11fd1493e277 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 25 Jul 2024 18:43:11 +0200 Subject: [PATCH 208/661] Fix test `00673_subquery_prepared_set_performance` --- .../0_stateless/00673_subquery_prepared_set_performance.sql | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/tests/queries/0_stateless/00673_subquery_prepared_set_performance.sql b/tests/queries/0_stateless/00673_subquery_prepared_set_performance.sql index b938d54c646..98c0802ffbc 100644 --- a/tests/queries/0_stateless/00673_subquery_prepared_set_performance.sql +++ b/tests/queries/0_stateless/00673_subquery_prepared_set_performance.sql @@ -1,14 +1,12 @@ --- Tags: no-tsan - DROP TABLE IF EXISTS mergetree_00673; CREATE TABLE mergetree_00673 (x UInt64) ENGINE = MergeTree ORDER BY x; INSERT INTO mergetree_00673 VALUES (1); -SELECT * FROM (SELECT * FROM (SELECT * FROM (SELECT * FROM (SELECT * FROM (SELECT * FROM (SELECT * FROM (SELECT * FROM (SELECT * FROM (SELECT * FROM (SELECT * FROM mergetree_00673 WHERE x IN (SELECT * FROM numbers(10000000)))))))))))); +SELECT * FROM (SELECT * FROM (SELECT * FROM (SELECT * FROM (SELECT * FROM (SELECT * FROM (SELECT * FROM (SELECT * FROM (SELECT * FROM (SELECT * FROM (SELECT * FROM (SELECT * FROM (SELECT * FROM (SELECT * FROM (SELECT * FROM (SELECT * FROM (SELECT * FROM (SELECT * FROM (SELECT * FROM (SELECT * FROM (SELECT * FROM mergetree_00673 WHERE x IN (SELECT * FROM numbers(1000000)))))))))))))))))))))); SET force_primary_key = 1; -SELECT * FROM (SELECT * FROM (SELECT * FROM (SELECT * FROM (SELECT * FROM (SELECT * FROM (SELECT * FROM (SELECT * FROM (SELECT * FROM (SELECT * FROM (SELECT * FROM mergetree_00673 WHERE x IN (SELECT * FROM numbers(10000000)))))))))))); +SELECT * FROM (SELECT * FROM (SELECT * FROM (SELECT * FROM (SELECT * FROM (SELECT * FROM (SELECT * FROM (SELECT * FROM (SELECT * FROM (SELECT * FROM (SELECT * FROM (SELECT * FROM (SELECT * FROM (SELECT * FROM (SELECT * FROM (SELECT * FROM (SELECT * FROM (SELECT * FROM (SELECT * FROM (SELECT * FROM (SELECT * FROM mergetree_00673 WHERE x IN (SELECT * FROM numbers(1000000)))))))))))))))))))))); DROP TABLE mergetree_00673; From 21f3a08ba7d626b967d99f694b1fde93da022ab9 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Thu, 25 Jul 2024 18:54:51 +0200 Subject: [PATCH 209/661] fix flaky test --- .../queries/0_stateless/03145_non_loaded_projection_backup.sh | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tests/queries/0_stateless/03145_non_loaded_projection_backup.sh b/tests/queries/0_stateless/03145_non_loaded_projection_backup.sh index 7df2118ad0c..95aef9bbc5b 100755 --- a/tests/queries/0_stateless/03145_non_loaded_projection_backup.sh +++ b/tests/queries/0_stateless/03145_non_loaded_projection_backup.sh @@ -6,8 +6,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) $CLICKHOUSE_CLIENT -nm -q " drop table if exists tp_1; -create table tp_1 (x Int32, y Int32, projection p (select x, y order by x)) engine = MergeTree order by y partition by intDiv(y, 100); -system stop merges tp_1; +create table tp_1 (x Int32, y Int32, projection p (select x, y order by x)) engine = MergeTree order by y partition by intDiv(y, 100) settings max_parts_to_merge_at_once=1; insert into tp_1 select number, number from numbers(3); set mutations_sync = 2; @@ -39,7 +38,6 @@ $CLICKHOUSE_CLIENT -nm -q " set send_logs_level='fatal'; drop table tp_1; restore table tp_1 from Disk('backups', '$backup_id'); -system stop merges tp_1; " | grep -o "RESTORED" $CLICKHOUSE_CLIENT -q "select count() from tp_1;" From f4b943f9f82bd4d297574774173e45abb2ee42d0 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 25 Jul 2024 19:05:41 +0200 Subject: [PATCH 210/661] Fix tidy --- src/Disks/IO/ReadBufferFromAzureBlobStorage.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Disks/IO/ReadBufferFromAzureBlobStorage.cpp b/src/Disks/IO/ReadBufferFromAzureBlobStorage.cpp index 377f6b36888..ba864035777 100644 --- a/src/Disks/IO/ReadBufferFromAzureBlobStorage.cpp +++ b/src/Disks/IO/ReadBufferFromAzureBlobStorage.cpp @@ -261,7 +261,7 @@ std::optional ReadBufferFromAzureBlobStorage::tryGetFileSize() if (!file_size) file_size = blob_client->GetProperties().Value.BlobSize; - return *file_size; + return file_size; } size_t ReadBufferFromAzureBlobStorage::readBigAt(char * to, size_t n, size_t range_begin, const std::function & /*progress_callback*/) const From a06df0729ea398642b715bfd2b121b1db0c5dd6d Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 25 Jul 2024 17:10:59 +0000 Subject: [PATCH 211/661] Remove the comment. --- src/Interpreters/ActionsDAG.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Interpreters/ActionsDAG.cpp b/src/Interpreters/ActionsDAG.cpp index 85b2b38da17..4aaecc491e0 100644 --- a/src/Interpreters/ActionsDAG.cpp +++ b/src/Interpreters/ActionsDAG.cpp @@ -3111,7 +3111,6 @@ ActionsDAG::NodeRawConstPtrs ActionsDAG::filterNodesByAllowedInputs( } FindOriginalNodeForOutputName::FindOriginalNodeForOutputName(const ActionsDAG & actions_) - //: actions(actions_) { const auto & actions_outputs = actions_.getOutputs(); for (const auto * output_node : actions_outputs) From 257be35365b8e0fd6163af027bbc02288ce8910b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Thu, 25 Jul 2024 19:21:31 +0200 Subject: [PATCH 212/661] Minor tweaks and extra type tests --- src/AggregateFunctions/SingleValueData.cpp | 6 +++++- ..._fix_single_value_data_assertion.reference | 12 +++++++++++ .../03210_fix_single_value_data_assertion.sql | 20 ++++++++++++++++++- 3 files changed, 36 insertions(+), 2 deletions(-) diff --git a/src/AggregateFunctions/SingleValueData.cpp b/src/AggregateFunctions/SingleValueData.cpp index 566b40253a3..11931acbbc8 100644 --- a/src/AggregateFunctions/SingleValueData.cpp +++ b/src/AggregateFunctions/SingleValueData.cpp @@ -904,8 +904,9 @@ bool SingleValueDataNumeric::isEqualTo(const DB::IColumn & column, size_t ind template bool SingleValueDataNumeric::isEqualTo(const DB::SingleValueDataBase & to) const { + /// to.has() is checked in memory.get().isEqualTo auto const & other = assert_cast(to); - return to.has() && memory.get().isEqualTo(other.memory.get()); + return memory.get().isEqualTo(other.memory.get()); } template @@ -917,6 +918,7 @@ void SingleValueDataNumeric::set(const DB::IColumn & column, size_t row_num, template void SingleValueDataNumeric::set(const DB::SingleValueDataBase & to, DB::Arena * arena) { + /// to.has() is checked in memory.get().set auto const & other = assert_cast(to); return memory.get().set(other.memory.get(), arena); } @@ -924,6 +926,7 @@ void SingleValueDataNumeric::set(const DB::SingleValueDataBase & to, DB::Aren template bool SingleValueDataNumeric::setIfSmaller(const DB::SingleValueDataBase & to, DB::Arena * arena) { + /// to.has() is checked in memory.get().setIfSmaller auto const & other = assert_cast(to); return memory.get().setIfSmaller(other.memory.get(), arena); } @@ -931,6 +934,7 @@ bool SingleValueDataNumeric::setIfSmaller(const DB::SingleValueDataBase & to, template bool SingleValueDataNumeric::setIfGreater(const DB::SingleValueDataBase & to, DB::Arena * arena) { + /// to.has() is checked in memory.get().setIfGreater auto const & other = assert_cast(to); return memory.get().setIfGreater(other.memory.get(), arena); } diff --git a/tests/queries/0_stateless/03210_fix_single_value_data_assertion.reference b/tests/queries/0_stateless/03210_fix_single_value_data_assertion.reference index e69de29bb2d..d8f7e13db55 100644 --- a/tests/queries/0_stateless/03210_fix_single_value_data_assertion.reference +++ b/tests/queries/0_stateless/03210_fix_single_value_data_assertion.reference @@ -0,0 +1,12 @@ +0 1 1 1 0 0 0 +1 3 3 3 2 2 2 +2 5 5 5 4 4 4 +3 7 7 7 6 6 6 +4 9 9 9 8 8 8 +5 11 11 11 10 10 10 +6 13 13 13 12 12 12 +7 15 15 15 14 14 14 +8 17 17 17 16 16 16 +9 19 19 19 18 18 18 + +0 107351244 107351244 107351244 107354520 107354520 107354520 diff --git a/tests/queries/0_stateless/03210_fix_single_value_data_assertion.sql b/tests/queries/0_stateless/03210_fix_single_value_data_assertion.sql index 66e62377d6b..a1243ef0b25 100644 --- a/tests/queries/0_stateless/03210_fix_single_value_data_assertion.sql +++ b/tests/queries/0_stateless/03210_fix_single_value_data_assertion.sql @@ -1 +1,19 @@ -SELECT intDiv(number, 2) AS k, count(toFixedString(toFixedString('hello', 5), 5)) IGNORE NULLS, sumArgMax(number, toString(number % 20)), argMax(toString(number), number) FROM (SELECT number FROM system.numbers LIMIT 65537) WHERE toLowCardinality(toLowCardinality(toNullable(21))) GROUP BY k WITH TOTALS ORDER BY k ASC NULLS FIRST LIMIT 255 SETTINGS group_by_overflow_mode = 'any', totals_mode = 'before_having', max_rows_to_group_by = 100000 FORMAT Null +SELECT + intDiv(number, 2) AS k, + sumArgMax(number, number % 20), + sumArgMax(number, leftPad(toString(number % 20), 5, '0')), -- Pad with 0 to preserve number ordering + sumArgMax(number, [number % 20, number % 20]), + sumArgMin(number, number % 20), + sumArgMin(number, leftPad(toString(number % 20), 5, '0')), + sumArgMin(number, [number % 20, number % 20]), +FROM +( + SELECT number + FROM system.numbers + LIMIT 65537 +) +GROUP BY k + WITH TOTALS +ORDER BY k ASC + LIMIT 10 +SETTINGS group_by_overflow_mode = 'any', totals_mode = 'before_having', max_rows_to_group_by = 100000; From 738d659e3bd8e222ff947e206d03d516c7053052 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 25 Jul 2024 17:26:16 +0000 Subject: [PATCH 213/661] Do not remove constants from Distributed header if query is executed up to Complete. --- src/Storages/StorageDistributed.cpp | 7 ++- .../02563_analyzer_merge.reference | 1 + .../0_stateless/02563_analyzer_merge.sql | 45 +++++++++++++++++++ 3 files changed, 51 insertions(+), 2 deletions(-) diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index 07892971ec2..9b417cda177 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -847,8 +847,11 @@ void StorageDistributed::read( /** For distributed tables we do not need constants in header, since we don't send them to remote servers. * Moreover, constants can break some functions like `hostName` that are constants only for local queries. */ - for (auto & column : header) - column.column = column.column->convertToFullColumnIfConst(); + if (processed_stage != QueryProcessingStage::Complete) + { + for (auto & column : header) + column.column = column.column->convertToFullColumnIfConst(); + } modified_query_info.query = queryNodeToDistributedSelectQuery(query_tree_distributed); modified_query_info.query_tree = std::move(query_tree_distributed); diff --git a/tests/queries/0_stateless/02563_analyzer_merge.reference b/tests/queries/0_stateless/02563_analyzer_merge.reference index 8be01c88d6f..2b3cc2d5dfb 100644 --- a/tests/queries/0_stateless/02563_analyzer_merge.reference +++ b/tests/queries/0_stateless/02563_analyzer_merge.reference @@ -1,2 +1,3 @@ 0 Value_0 02563_db test_merge_table_1 1 Value_1 02563_db test_merge_table_2 +91138316-5127-45ac-9c25-4ad8779777b4 160 diff --git a/tests/queries/0_stateless/02563_analyzer_merge.sql b/tests/queries/0_stateless/02563_analyzer_merge.sql index c90f7dcb2a5..217fb7019c4 100644 --- a/tests/queries/0_stateless/02563_analyzer_merge.sql +++ b/tests/queries/0_stateless/02563_analyzer_merge.sql @@ -35,4 +35,49 @@ SELECT id, value, _database, _table FROM 02563_db.test_merge_table ORDER BY id; DROP TABLE 02563_db.test_merge_table; DROP TABLE 02563_db.test_merge_table_1; DROP TABLE 02563_db.test_merge_table_2; + +CREATE TABLE 02563_db.t_1 +( + timestamp DateTime64(9), + a String, + b String +) +ENGINE = MergeTree +PARTITION BY formatDateTime(toStartOfMinute(timestamp), '%Y%m%d%H', 'UTC') +ORDER BY (timestamp, a, b); + +CREATE TABLE 02563_db.dist_t_1 (timestamp DateTime64(9), a String, b String) ENGINE = Distributed('test_shard_localhost', '02563_db', 't_1'); + +CREATE TABLE 02563_db.m ENGINE = Merge('02563_db', '^dist_'); + +INSERT INTO 02563_db.t_1 (timestamp, a, b) +select + addMinutes(toDateTime64('2024-07-13 22:00:00', 9, 'UTC'), number), + randomString(5), + randomString(5) +from numbers(30); + +INSERT INTO 02563_db.t_1 (timestamp, a, b) +select + addMinutes(toDateTime64('2024-07-13 23:00:00', 9, 'UTC'), number), + randomString(5), + randomString(5) +from numbers(30); + +INSERT INTO 02563_db.t_1 (timestamp, a, b) +select + addMinutes(toDateTime64('2024-07-14 00:00:00', 9, 'UTC'), number), + randomString(5), + randomString(5) +from numbers(100); + + +SELECT '91138316-5127-45ac-9c25-4ad8779777b4', + count() +FROM 02563_db.m; + +DROP TABLE 02563_db.t_1; +DROP TABLE 02563_db.dist_t_1; +DROP TABLE 02563_db.m; + DROP DATABASE 02563_db; From a3d5b2d29014bb3894982cdb1cadd65448ecdf63 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Thu, 25 Jul 2024 19:39:20 +0200 Subject: [PATCH 214/661] Update ZooKeeperImpl.cpp --- src/Common/ZooKeeper/ZooKeeperImpl.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/Common/ZooKeeper/ZooKeeperImpl.cpp b/src/Common/ZooKeeper/ZooKeeperImpl.cpp index 2728f953bea..d01fc341a63 100644 --- a/src/Common/ZooKeeper/ZooKeeperImpl.cpp +++ b/src/Common/ZooKeeper/ZooKeeperImpl.cpp @@ -1014,9 +1014,6 @@ void ZooKeeper::finalize(bool error_send, bool error_receive, const String & rea LOG_INFO(log, "Finalizing session {}. finalization_started: {}, queue_finished: {}, reason: '{}'", session_id, already_started, requests_queue.isFinished(), reason); - /// Reset the original index. - original_index = -1; - auto expire_session_if_not_expired = [&] { /// No new requests will appear in queue after finish() From f32a0716b9bb42a09ece308a3ca64626099bfb1e Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Thu, 25 Jul 2024 19:45:06 +0200 Subject: [PATCH 215/661] Update 02842_truncate_database.sql --- tests/queries/0_stateless/02842_truncate_database.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02842_truncate_database.sql b/tests/queries/0_stateless/02842_truncate_database.sql index be92108ccb8..bcd818f55ba 100644 --- a/tests/queries/0_stateless/02842_truncate_database.sql +++ b/tests/queries/0_stateless/02842_truncate_database.sql @@ -73,7 +73,7 @@ SELECT * FROM dest_dictionary; -- {serverError UNKNOWN_TABLE} SHOW TABLES FROM test_truncate_database; SHOW DICTIONARIES FROM test_truncate_database; -CREATE TABLE new_table (x UInt16) ENGINE = ReplicatedMergeTree ORDER BY x; +CREATE TABLE new_table (x UInt16) ENGINE = MergeTree ORDER BY x; select 'new tables'; SHOW TABLES FROM test_truncate_database; From eb4ec0912ad3a1e89ea7aec424366bc268262e11 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Thu, 25 Jul 2024 20:21:37 +0200 Subject: [PATCH 216/661] Rename bad setting --- CHANGELOG.md | 2 +- src/Core/Settings.h | 2 +- src/Core/SettingsChangesHistory.cpp | 2 +- src/Formats/FormatFactory.cpp | 2 +- src/Formats/FormatSettings.h | 2 +- src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp | 4 ++-- tests/queries/0_stateless/03013_json_key_ignore_case.sh | 4 ++-- 7 files changed, 9 insertions(+), 9 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a0933bd6544..07b37835dda 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -64,7 +64,7 @@ * Print stacktrace and diagnostic info if `clickhouse-client` or `clickhouse-local` crashes. [#61109](https://github.com/ClickHouse/ClickHouse/pull/61109) ([Alexander Tokmakov](https://github.com/tavplubix)). * The result of `SHOW INDEX | INDEXES | INDICES | KEYS` was previously sorted by the primary key column names. Since this was unintuitive, the result is now sorted by the position of the primary key columns within the primary key. [#61131](https://github.com/ClickHouse/ClickHouse/pull/61131) ([Robert Schulze](https://github.com/rschu1ze)). * Change how deduplication for Materialized Views works. Fixed a lot of cases like: - on destination table: data is split for 2 or more blocks and that blocks is considered as duplicate when that block is inserted in parallel. - on MV destination table: the equal blocks are deduplicated, that happens when MV often produces equal data as a result for different input data due to performing aggregation. - on MV destination table: the equal blocks which comes from different MV are deduplicated. [#61601](https://github.com/ClickHouse/ClickHouse/pull/61601) ([Sema Checherinda](https://github.com/CheSema)). -* Allow matching column names in a case insensitive manner when reading json files (`input_format_json_ignore_key_case`). [#61750](https://github.com/ClickHouse/ClickHouse/pull/61750) ([kevinyhzou](https://github.com/KevinyhZou)). +* Allow matching column names in a case insensitive manner when reading json files (`input_format_json_case_insensitive_column_matching`). [#61750](https://github.com/ClickHouse/ClickHouse/pull/61750) ([kevinyhzou](https://github.com/KevinyhZou)). * Support reading partitioned data DeltaLake data. Infer DeltaLake schema by reading metadata instead of data. [#63201](https://github.com/ClickHouse/ClickHouse/pull/63201) ([Kseniia Sumarokova](https://github.com/kssenii)). * In composable protocols TLS layer accepted only `certificateFile` and `privateKeyFile` parameters. https://clickhouse.com/docs/en/operations/settings/composable-protocols. [#63985](https://github.com/ClickHouse/ClickHouse/pull/63985) ([Anton Ivashkin](https://github.com/ianton-ru)). * Added profile event `SelectQueriesWithPrimaryKeyUsage` which indicates how many SELECT queries use the primary key to evaluate the WHERE clause. [#64492](https://github.com/ClickHouse/ClickHouse/pull/64492) ([0x01f](https://github.com/0xfei)). diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 3f1ecc47f79..e10cf3fd745 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -1122,7 +1122,7 @@ class IColumn; M(Bool, input_format_json_defaults_for_missing_elements_in_named_tuple, true, "Insert default value in named tuple element if it's missing in json object", 0) \ M(Bool, input_format_json_throw_on_bad_escape_sequence, true, "Throw an exception if JSON string contains bad escape sequence in JSON input formats. If disabled, bad escape sequences will remain as is in the data", 0) \ M(Bool, input_format_json_ignore_unnecessary_fields, true, "Ignore unnecessary fields and not parse them. Enabling this may not throw exceptions on json strings of invalid format or with duplicated fields", 0) \ - M(Bool, input_format_json_ignore_key_case, false, "Ignore json key case while read json field from string", 0) \ + M(Bool, input_format_json_case_insensitive_column_matching, false, "Ignore case when matching JSON keys with CH columns", 0) \ M(Bool, input_format_try_infer_integers, true, "Try to infer integers instead of floats while schema inference in text formats", 0) \ M(Bool, input_format_try_infer_dates, true, "Try to infer dates from string fields while schema inference in text formats", 0) \ M(Bool, input_format_try_infer_datetimes, true, "Try to infer datetimes from string fields while schema inference in text formats", 0) \ diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp index acd119c159b..9faf77e9087 100644 --- a/src/Core/SettingsChangesHistory.cpp +++ b/src/Core/SettingsChangesHistory.cpp @@ -64,7 +64,7 @@ static std::initializer_list Date: Thu, 25 Jul 2024 19:17:38 +0000 Subject: [PATCH 217/661] Fix: order by all with parallel replicas --- src/Analyzer/QueryTreeBuilder.cpp | 7 ++++++- ...09_parallel_replicas_order_by_all.reference | 12 ++++++++++++ .../03209_parallel_replicas_order_by_all.sql | 18 ++++++++++++++++++ 3 files changed, 36 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/03209_parallel_replicas_order_by_all.reference create mode 100644 tests/queries/0_stateless/03209_parallel_replicas_order_by_all.sql diff --git a/src/Analyzer/QueryTreeBuilder.cpp b/src/Analyzer/QueryTreeBuilder.cpp index a62b6e56ac5..ed1227b0f00 100644 --- a/src/Analyzer/QueryTreeBuilder.cpp +++ b/src/Analyzer/QueryTreeBuilder.cpp @@ -268,6 +268,8 @@ QueryTreeNodePtr QueryTreeBuilder::buildSelectExpression(const ASTPtr & select_q } } + const auto enable_order_by_all = updated_context->getSettingsRef().enable_order_by_all; + auto current_query_tree = std::make_shared(std::move(updated_context), std::move(settings_changes)); current_query_tree->setIsSubquery(is_subquery); @@ -281,7 +283,10 @@ QueryTreeNodePtr QueryTreeBuilder::buildSelectExpression(const ASTPtr & select_q current_query_tree->setIsGroupByWithRollup(select_query_typed.group_by_with_rollup); current_query_tree->setIsGroupByWithGroupingSets(select_query_typed.group_by_with_grouping_sets); current_query_tree->setIsGroupByAll(select_query_typed.group_by_all); - current_query_tree->setIsOrderByAll(select_query_typed.order_by_all); + /// order_by_all flag in AST is set w/o consideration of `enable_order_by_all` setting + /// since SETTINGS section has not been parsed yet, - so, check the setting here + if (enable_order_by_all) + current_query_tree->setIsOrderByAll(select_query_typed.order_by_all); current_query_tree->setOriginalAST(select_query); auto current_context = current_query_tree->getContext(); diff --git a/tests/queries/0_stateless/03209_parallel_replicas_order_by_all.reference b/tests/queries/0_stateless/03209_parallel_replicas_order_by_all.reference new file mode 100644 index 00000000000..fd453d088a6 --- /dev/null +++ b/tests/queries/0_stateless/03209_parallel_replicas_order_by_all.reference @@ -0,0 +1,12 @@ +-- { echoOn } +SELECT a, b, all FROM order_by_all ORDER BY all SETTINGS enable_order_by_all = 0, allow_experimental_parallel_reading_from_replicas=0; +B 3 10 +D 1 20 +A 2 30 +C \N 40 +SELECT a, b, all FROM order_by_all ORDER BY all SETTINGS enable_order_by_all = 0, allow_experimental_parallel_reading_from_replicas=1; +B 3 10 +D 1 20 +A 2 30 +C \N 40 +DROP TABLE order_by_all SYNC; diff --git a/tests/queries/0_stateless/03209_parallel_replicas_order_by_all.sql b/tests/queries/0_stateless/03209_parallel_replicas_order_by_all.sql new file mode 100644 index 00000000000..46a3ab4d171 --- /dev/null +++ b/tests/queries/0_stateless/03209_parallel_replicas_order_by_all.sql @@ -0,0 +1,18 @@ +DROP TABLE IF EXISTS order_by_all SYNC; +CREATE TABLE order_by_all +( + a String, + b Nullable(Int32), + all UInt64 +) +ENGINE = ReplicatedMergeTree('/clickhouse/{database}/test_03210', 'r1') ORDER BY tuple(); + +INSERT INTO order_by_all VALUES ('B', 3, 10), ('C', NULL, 40), ('D', 1, 20), ('A', 2, 30); + +SET allow_experimental_parallel_reading_from_replicas=1, max_parallel_replicas=3, cluster_for_parallel_replicas='parallel_replicas'; +SET enable_order_by_all = 0; +-- { echoOn } +SELECT a, b, all FROM order_by_all ORDER BY all SETTINGS enable_order_by_all = 0, allow_experimental_parallel_reading_from_replicas=0; +SELECT a, b, all FROM order_by_all ORDER BY all SETTINGS enable_order_by_all = 0, allow_experimental_parallel_reading_from_replicas=1; + +DROP TABLE order_by_all SYNC; From 1ba4790511e1a06af8fb85e01767ce95866ee2a8 Mon Sep 17 00:00:00 2001 From: Blargian Date: Thu, 25 Jul 2024 21:18:48 +0200 Subject: [PATCH 218/661] Review changes --- .../functions/type-conversion-functions.md | 550 ++++++++++-------- 1 file changed, 310 insertions(+), 240 deletions(-) diff --git a/docs/en/sql-reference/functions/type-conversion-functions.md b/docs/en/sql-reference/functions/type-conversion-functions.md index 057083d317f..844d957d538 100644 --- a/docs/en/sql-reference/functions/type-conversion-functions.md +++ b/docs/en/sql-reference/functions/type-conversion-functions.md @@ -51,7 +51,7 @@ SETTINGS cast_keep_nullable = 1 ## toInt8 -Converts an input value to a value of type `Int8`. +Converts an input value to a value of type [`Int8`](../data-types/int-uint.md). Throws an exception in case of an error. **Syntax** @@ -61,10 +61,20 @@ toInt8(expr) **Arguments** -- `expr` — Expression returning a number or a string with the decimal representation of a number. [Expression](../syntax.md/#syntax-expressions). +- `expr` — Expression returning a number or a string representation of a number. [Expression](../syntax.md/#syntax-expressions). + +Supported types: +- (U)Int8/16/32/64/128/256 +- Float* +- String representations of (U)Int8/16/32/128/256 + +Unsupported types: +- Float values `NaN` and `Inf` throw an exception. +- String representations of binary and hexadecimal values, e.g. `SELECT toInt8('0xc0fe');` :::note -Binary, octal, and hexadecimal representations of numbers are not supported. Leading zeroes are stripped. +If the input value cannot be represented within the bounds of [Int8](../data-types/int-uint.md), the result over or under flows. This is not considered an error. +For example: `SELECT toInt8(128) == -128;`, `SELECT toInt8(128.0) == -128;`, `SELECT toInt8('128') == -128;`. ::: **Returned value** @@ -72,11 +82,7 @@ Binary, octal, and hexadecimal representations of numbers are not supported. Lea - 8-bit integer value. [Int8](../data-types/int-uint.md). :::note -Function uses [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning it truncates fractional digits of numbers. -::: - -:::danger -An exception is thrown for [NaN and Inf](../data-types/float.md/#data_type-float-nan-inf) arguments. Keep in mind [numeric conversions issues](#common-issues-with-data-conversion), when using this function. +The function uses [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning it truncates fractional digits of numbers. ::: **Example** @@ -106,32 +112,33 @@ Result: ## toInt8OrZero -Like [`toInt8`](#toint8), it takes an argument of type [String](../data-types/string.md) and tries to parse it to type `Int8`. If unsuccessful, returns `0`. +Like [`toInt8`](#toint8), this function converts an input value to a value of type [Int8](../data-types/int-uint.md) but returns `0` in case of an error. **Syntax** ```sql -toInt8OrZero(expr) +toInt8OrZero(x) ``` **Arguments** -- `expr` — Expression returning a number or a string with the decimal representation of a number. [Expression](../syntax.md/#syntax-expressions) / [String](../data-types/string.md). +- `x` — A String representation of a number. [String](../data-types/string.md). -:::note -Binary, octal, and hexadecimal representations of numbers are not supported. Leading zeroes are stripped. -::: +Supported types: +- String representations of (U)Int8/16/32/128/256 + +Types for which `0` is returned: +- String representations of ordinary Float32/64 values. +- String representations of Float values `NaN` and `Inf`. +- String representations of binary and hexadecimal values, e.g. `SELECT toInt8OrZero('0xc0fe');`. +- If the input value cannot be represented within the bounds of [toInt16](../data-types/int-uint.md), and the result over or under flows. **Returned value** - 8-bit integer value if successful, otherwise `0`. [Int8](../data-types/int-uint.md). :::note -Function uses [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning it truncates fractional digits of numbers. -::: - -:::danger -An exception is thrown for [NaN and Inf](../data-types/float.md/#data_type-float-nan-inf) arguments. Keep in mind [numeric conversions issues](#common-issues-with-data-conversion), when using this function. +The function uses [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning it truncates fractional digits of numbers. ::: **Example** @@ -160,32 +167,33 @@ Result: ## toInt8OrNull -Like [`toInt8`](#toint8), takes an argument of type [String](../data-types/string.md) and tries to parse it to type `Int8`. If unsuccessful, returns `NULL`. +Like [`toInt8`](#toint8), takes an argument of type [String](../data-types/string.md) and tries to parse it to type [`Int8`](../data-types/int-uint.md). If unsuccessful, returns [`NULL`](../data-types/nullable.md). **Syntax** ```sql -toInt8OrNull(expr) +toInt8OrNull(x) ``` **Arguments** -- `expr` — Expression returning a number or a string with the decimal representation of a number. [Expression](../syntax.md/#syntax-expressions) / [String](../data-types/string.md). +- `x` — A String representation of a number. [String](../data-types/string.md). -:::note -Binary, octal, and hexadecimal representations of numbers are not supported. Leading zeroes are stripped. -::: +Supported types: +- String representations of (U)Int8/16/32/128/256 + +Types for which `\N` is returned: +- String representations of ordinary Float32/64 values. +- String representations of Float values `NaN` and `Inf`. +- String representations of binary and hexadecimal values, e.g. `SELECT toInt8OrNull('0xc0fe');`. +- If the input value cannot be represented within the bounds of [Int16](../data-types/int-uint.md), and the result over or under flows. **Returned value** - 8-bit integer value if successful, otherwise `NULL`. [Int8](../data-types/int-uint.md) / [NULL](../data-types/nullable.md). :::note -Function uses [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning it truncates fractional digits of numbers. -::: - -:::danger -An exception is thrown for [NaN and Inf](../data-types/float.md/#data_type-float-nan-inf) arguments. Keep in mind [numeric conversions issues](#common-issues-with-data-conversion), when using this function. +The function uses [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning it truncates fractional digits of numbers. ::: **Example** @@ -212,7 +220,7 @@ Result: ## toInt8OrDefault -Like [`toInt8`](#toint8), takes an argument of type [String](../data-types/string.md) and tries to parse it to type `Int8`. If unsuccessful, returns the default type value. +Like [`toInt8`](#toint8), takes an argument of type [String](../data-types/string.md) and tries to parse it to type [`Int8`](../data-types/int-uint.md). If unsuccessful, returns the default type value. **Syntax** @@ -222,26 +230,28 @@ toInt8OrDefault(expr, def) **Arguments** -- `expr` — Expression returning a number or a string with the decimal representation of a number. [Expression](../syntax.md/#syntax-expressions) / [String](../data-types/string.md). +- `expr` — Expression returning a number or a string representation of a number. [Expression](../syntax.md/#syntax-expressions) / [String](../data-types/string.md). - `def` — The default value to return if parsing to type `Int8` is unsuccessful. [Int8](../data-types/int-uint.md). -:::note -Binary, octal, and hexadecimal representations of numbers are not supported. Leading zeroes are stripped. -::: +Supported types: +- (U)Int8/16/32/64/128/256 +- Float* +- String representations of (U)Int8/16/32/128/256 + +Types for which the default value is returned: +- Float values `NaN` and `Inf` return the default value. +- String representations of binary and hexadecimal values, e.g. `SELECT toInt8OrDefault('0xc0fe', CAST('-1', 'Int8'));` +- If the input value cannot be represented within the bounds of [Int8](../data-types/int-uint.md) and the result over or under flows. **Returned value** - 8-bit integer value if successful, otherwise returns the default value. [Int8](../data-types/int-uint.md). :::note -- Function uses [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning it truncates fractional digits of numbers. +- The function uses [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning it truncates fractional digits of numbers. - The default value type should be the same as the cast type. ::: -:::danger -An exception is thrown for [NaN and Inf](../data-types/float.md/#data_type-float-nan-inf) arguments. Keep in mind [numeric conversions issues](#common-issues-with-data-conversion), when using this function. -::: - **Example** Query: @@ -268,7 +278,7 @@ Result: ## toInt16 -Converts an input value to a value of type `Int16`. +Converts an input value to a value of type [`Int16`](../data-types/int-uint.md). Throws an exception in case of an error. **Syntax** @@ -278,10 +288,20 @@ toInt16(expr) **Arguments** -- `expr` — Expression returning a number or a string with the decimal representation of a number. [Expression](../syntax.md/#syntax-expressions). +- `expr` — Expression returning a number or a string representation of a number. [Expression](../syntax.md/#syntax-expressions). + +Supported types: +- (U)Int8/16/32/64/128/256 +- Float* +- String representations of (U)Int8/16/32/128/256 + +Unsupported types: +- Float values `NaN` and `Inf` throw an exception. +- String representations of binary and hexadecimal values, e.g. `SELECT toInt16('0xc0fe');` :::note -Binary, octal, and hexadecimal representations of numbers are not supported. Leading zeroes are stripped. +If the input value cannot be represented within the bounds of [toInt16](../data-types/int-uint.md), the result over or under flows. This is not considered an error. +For example: `SELECT toInt16(32768) == -32768;`, `SELECT toInt16(32768) == -32768;`, `SELECT toInt16('32768') == -32768;`. ::: **Returned value** @@ -289,11 +309,7 @@ Binary, octal, and hexadecimal representations of numbers are not supported. Lea - 16-bit integer value. [Int16](../data-types/int-uint.md). :::note -Function uses [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning it truncates fractional digits of numbers. -::: - -:::danger -An exception is thrown for [NaN and Inf](../data-types/float.md/#data_type-float-nan-inf) arguments. Keep in mind [numeric conversions issues](#common-issues-with-data-conversion), when using this function. +The function uses [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning it truncates fractional digits of numbers. ::: **Example** @@ -323,32 +339,33 @@ Result: ## toInt16OrZero -Like [`toInt16`](#toint16), takes an argument of type [String](../data-types/string.md) and tries to parse it to type `Int16`. If unsuccessful, returns `0`. +Like [`toInt16`](#toint16), this function converts an input value to a value of type [Int16](../data-types/int-uint.md) but returns `0` in case of an error. **Syntax** ```sql -toInt16OrZero(expr) +toInt16OrZero(x) ``` **Arguments** -- `expr` — Expression returning a number or a string with the decimal representation of a number. [Expression](../syntax.md/#syntax-expressions) / [String](../data-types/string.md). +- `x` — A String representation of a number. [String](../data-types/string.md). -:::note -Binary, octal, and hexadecimal representations of numbers are not supported. Leading zeroes are stripped. -::: +Supported types: +- String representations of (U)Int8/16/32/128/256 + +Types for which `0` is returned: +- String representations of ordinary Float32/64 values. +- String representations of Float values `NaN` and `Inf`. +- String representations of binary and hexadecimal values, e.g. `SELECT toInt16OrZero('0xc0fe');`. +- If the input value cannot be represented within the bounds of [Int16](../data-types/int-uint.md) and the result over or under flows. **Returned value** - 16-bit integer value if successful, otherwise `0`. [Int16](../data-types/int-uint.md). :::note -Function uses [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning it truncates fractional digits of numbers. -::: - -:::danger -An exception is thrown for [NaN and Inf](../data-types/float.md/#data_type-float-nan-inf) arguments. Keep in mind [numeric conversions issues](#common-issues-with-data-conversion), when using this function. +The function uses [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning it truncates fractional digits of numbers. ::: **Example** @@ -377,32 +394,33 @@ Result: ## toInt16OrNull -Like [`toInt16`](#toint16), takes an argument of type [String](../data-types/string.md) and tries to parse it to type `Int16`. If unsuccessful, returns `NULL`. +Like [`toInt16`](#toint16), takes an argument of type [String](../data-types/string.md) and tries to parse it to type [`Int16`](../data-types/int-uint.md). If unsuccessful, returns [`NULL`](../data-types/nullable.md). **Syntax** ```sql -toInt16OrNull(expr) +toInt16OrNull(x) ``` **Arguments** -- `expr` — Expression returning a number or a string with the decimal representation of a number. [Expression](../syntax.md/#syntax-expressions) / [String](../data-types/string.md). +- `x` — A String representation of a number. [String](../data-types/string.md). -:::note -Binary, octal, and hexadecimal representations of numbers are not supported. Leading zeroes are stripped. -::: +Supported types: +- String representations of (U)Int8/16/32/128/256 + +Types for which `\N` is returned: +- String representations of ordinary Float32/64 values. +- String representations of Float values `NaN` and `Inf`. +- String representations of binary and hexadecimal values, e.g. `SELECT toInt16OrNull('0xc0fe');`. +- If the input value cannot be represented within the bounds of [Int16](../data-types/int-uint.md) and the result over or under flows. **Returned value** - 16-bit integer value if successful, otherwise `NULL`. [Int16](../data-types/int-uint.md) / [NULL](../data-types/nullable.md). :::note -Function uses [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning it truncates fractional digits of numbers. -::: - -:::danger -An exception is thrown for [NaN and Inf](../data-types/float.md/#data_type-float-nan-inf) arguments. Keep in mind [numeric conversions issues](#common-issues-with-data-conversion), when using this function. +The function uses [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning it truncates fractional digits of numbers. ::: **Example** @@ -431,7 +449,7 @@ Result: ## toInt16OrDefault -Like [`toInt16`](#toint16), takes an argument of type [String](../data-types/string.md) and tries to parse it to type `Int16`. If unsuccessful, returns the default type value. +Like [`toInt16`](#toint16), takes an argument of type [String](../data-types/string.md) and tries to parse it to type [`Int16`](../data-types/int-uint.md). If unsuccessful, returns the default type value. **Syntax** @@ -441,26 +459,28 @@ toInt16OrDefault(expr, def) **Arguments** -- `expr` — Expression returning a number or a string with the decimal representation of a number. [Expression](../syntax.md/#syntax-expressions) / [String](../data-types/string.md). +- `expr` — Expression returning a number or a string representation of a number. [Expression](../syntax.md/#syntax-expressions) / [String](../data-types/string.md). - `def` — The default value to return if parsing to type `Int16` is unsuccessful. [Int8](../data-types/int-uint.md). -:::note -Binary, octal, and hexadecimal representations of numbers are not supported. Leading zeroes are stripped. -::: +Supported types: +- (U)Int8/16/32/64/128/256 +- Float* +- String representations of (U)Int8/16/32/128/256 + +Types for which the default value is returned: +- Float values `NaN` and `Inf` return the default value. +- String representations of binary and hexadecimal values, e.g. `SELECT toInt16OrDefault('0xc0fe', CAST('-1', 'Int16'));` +- If the input value cannot be represented within the bounds of [Int16](../data-types/int-uint.md) and the result over or under flows. **Returned value** - 16-bit integer value if successful, otherwise returns the default value. [Int16](../data-types/int-uint.md). :::note -- Function uses [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning it truncates fractional digits of numbers. +- The function uses [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning it truncates fractional digits of numbers. - The default value type should be the same as the cast type. ::: -:::danger -An exception is thrown for [NaN and Inf](../data-types/float.md/#data_type-float-nan-inf) arguments. Keep in mind [numeric conversions issues](#common-issues-with-data-conversion), when using this function. -::: - **Example** Query: @@ -485,7 +505,7 @@ Result: ## toInt32 -Converts an input value to a value of type `Int32`. +Converts an input value to a value of type [`Int32`](../data-types/int-uint.md). Throws an exception in case of an error. **Syntax** @@ -495,10 +515,25 @@ toInt32(expr) **Arguments** -- `expr` — Expression returning a number or a string with the decimal representation of a number. [Expression](../syntax.md/#syntax-expressions). +- `expr` — Expression returning a number or a string representation of a number. [Expression](../syntax.md/#syntax-expressions). + +Supported types: +- (U)Int8/16/32/64/128/256 +- Float* +- String representations of (U)Int8/16/32/128/256 + +Unsupported types: +- Float values `NaN` and `Inf` throw an exception. +- String representations of binary and hexadecimal values, e.g. `SELECT toInt32('0xc0fe');` :::note -Binary, octal, and hexadecimal representations of numbers are not supported. Leading zeroes are stripped. +If the input value cannot be represented within the bounds of [toInt16](../data-types/int-uint.md), the result over or under flows. This is not considered an error. +For example: +``` +SELECT toInt32(2147483648) == -2147483648; +SELECT toInt32(2147483648.0) == -2147483648; +SELECT toInt32('2147483648') == -2147483648; +``` ::: **Returned value** @@ -506,11 +541,7 @@ Binary, octal, and hexadecimal representations of numbers are not supported. Lea - 32-bit integer value. [Int32](../data-types/int-uint.md). :::note -Function uses [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning it truncates fractional digits of numbers. -::: - -:::danger -An exception is thrown for [NaN and Inf](../data-types/float.md/#data_type-float-nan-inf) arguments. Keep in mind [numeric conversions issues](#common-issues-with-data-conversion), when using this function. +The function uses [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning it truncates fractional digits of numbers. ::: **Example** @@ -540,32 +571,34 @@ Result: ## toInt32OrZero -Like [`toInt32`](#toint32), takes an argument of type [String](../data-types/string.md) and tries to parse it to type `Int32`. If unsuccessful, returns `0`. +Like [`toInt32`](#toint32), this function converts an input value to a value of type [Int8](../data-types/int-uint.md) but returns `0` in case of an error. **Syntax** ```sql -toInt32OrZero(expr) +toInt32OrZero(x) ``` **Arguments** -- `expr` — Expression returning a number or a string with the decimal representation of a number. [Expression](../syntax.md/#syntax-expressions) / [String](../data-types/string.md). +- `x` — A String representation of a number. [String](../data-types/string.md). + +Supported types: +- String representations of (U)Int8/16/32/128/256 + +Types for which `0` is returned: +- String representations of ordinary Float32/64 values. +- String representations of Float values `NaN` and `Inf`. +- String representations of binary and hexadecimal values, e.g. `SELECT toInt32OrZero('0xc0fe');`. +- If the input value cannot be represented within the bounds of [Int32](../data-types/int-uint.md) and the result over or under flows. -:::note -Binary, octal, and hexadecimal representations of numbers are not supported. Leading zeroes are stripped. -::: **Returned value** - 32-bit integer value if successful, otherwise `0`. [Int32](../data-types/int-uint.md) :::note -Function uses [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning it truncate fractional digits of numbers. -::: - -:::danger -An exception is thrown for [NaN and Inf](../data-types/float.md/#data_type-float-nan-inf) arguments. Keep in mind [numeric conversions issues](#common-issues-with-data-conversion), when using this function. +The function uses [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning it truncate fractional digits of numbers. ::: **Example** @@ -588,35 +621,36 @@ Result: - [`toInt32`](#toint32). - [`toInt32OrNull`](#toint32ornull). - [`toInt32OrDefault`](#toint32ordefault). -- + ## toInt32OrNull -Like [`toInt32`](#toint32), takes an argument of type [String](../data-types/string.md) and tries to parse it to type `Int32`. If unsuccessful, returns `NULL`. +Like [`toInt32`](#toint32), takes an argument of type [String](../data-types/string.md) and tries to parse it to type [`Int32`](../data-types/int-uint.md). If unsuccessful, returns [`NULL`](../data-types/nullable.md). **Syntax** ```sql -toInt32OrNull(expr) +toInt32OrNull(x) ``` **Arguments** -- `expr` — Expression returning a number or a string with the decimal representation of a number. [Expression](../syntax.md/#syntax-expressions) / [String](../data-types/string.md). +- `x` — A String representation of a number. [String](../data-types/string.md). -:::note -Binary, octal, and hexadecimal representations of numbers are not supported. Leading zeroes are stripped. -::: +Supported types: +- String representations of (U)Int8/16/32/128/256 + +Types for which `\N` is returned: +- String representations of ordinary Float32/64 values. +- String representations of Float values `NaN` and `Inf`. +- String representations of binary and hexadecimal values, e.g. `SELECT toInt32OrNull('0xc0fe');`. +- If the input value cannot be represented within the bounds of [Int32](../data-types/int-uint.md) and the result over or under flows. **Returned value** - 32-bit integer value if successful, otherwise `NULL`. [Int32](../data-types/int-uint.md) / [NULL](../data-types/nullable.md). :::note -Function uses [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning it truncates fractional digits of numbers. -::: - -:::danger -An exception is thrown for [NaN and Inf](../data-types/float.md/#data_type-float-nan-inf) arguments. Keep in mind [numeric conversions issues](#common-issues-with-data-conversion), when using this function. +The function uses [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning it truncates fractional digits of numbers. ::: **Example** @@ -643,7 +677,7 @@ Result: ## toInt32OrDefault -Like [`toInt32`](#toint32), takes an argument of type [String](../data-types/string.md) and tries to parse it to type `Int32`. If unsuccessful, returns the default type value. +Like [`toInt32`](#toint32), takes an argument of type [String](../data-types/string.md) and tries to parse it to type [`Int32`](../data-types/int-uint.md). If unsuccessful, returns the default type value. **Syntax** @@ -653,24 +687,26 @@ toInt32OrDefault(expr, def) **Arguments** -- `expr` — Expression returning a number or a string with the decimal representation of a number. [Expression](../syntax.md/#syntax-expressions) / [String](../data-types/string.md). +- `expr` — Expression returning a number or a string representation of a number. [Expression](../syntax.md/#syntax-expressions) / [String](../data-types/string.md). - `def` — The default value to return if parsing to type `Int32` is unsuccessful. [Int32](../data-types/int-uint.md). -:::note -Binary, octal, and hexadecimal representations of numbers are not supported. Leading zeroes are stripped. -::: +Supported types: +- (U)Int8/16/32/64/128/256 +- Float* +- String representations of (U)Int8/16/32/128/256 + +Types for which the default value is returned: +- Float values `NaN` and `Inf` return the default value. +- String representations of binary and hexadecimal values, e.g. `SELECT toInt32OrDefault('0xc0fe', CAST('-1', 'Int32'));` +- If the input value cannot be represented within the bounds of [Int32](../data-types/int-uint.md) and the result over or under flows. **Returned value** - 32-bit integer value if successful, otherwise returns the default value. [Int32](../data-types/int-uint.md). :::note -- Function uses [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning it truncates fractional digits of numbers. +- The function uses [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning it truncates fractional digits of numbers. - The default value type should be the same as the cast type. - ::: - -:::danger -An exception is thrown for [NaN and Inf](../data-types/float.md/#data_type-float-nan-inf) arguments. Keep in mind [numeric conversions issues](#common-issues-with-data-conversion), when using this function. ::: **Example** @@ -697,7 +733,7 @@ Result: ## toInt64 -Converts an input value to a value of type `Int64`. +Converts an input value to a value of type [`Int64`](../data-types/int-uint.md). Throws an exception in case of an error. **Syntax** @@ -707,10 +743,26 @@ toInt64(expr) **Arguments** -- `expr` — Expression returning a number or a string with the decimal representation of a number. [Expression](../syntax.md/#syntax-expressions). +- `expr` — Expression returning a number or a string representation of a number. [Expression](../syntax.md/#syntax-expressions). + +Supported types: +- (U)Int8/16/32/64/128/256 +- Float* +- String representations of (U)Int8/16/32/128/256 + +Unsupported types: +- Float values `NaN` and `Inf` throw an exception. +- String representations of binary and hexadecimal values, e.g. `SELECT toInt64('0xc0fe');` :::note -Binary, octal, and hexadecimal representations of numbers are not supported. Leading zeroes are stripped. +If the input value cannot be represented within the bounds of [Int16](../data-types/int-uint.md), the result over or under flows. This is not considered an error. +For example: + +``` +SELECT toInt64(9223372036854775808) == -9223372036854775808; +SELECT toInt64(9223372036854775808.0) == -9223372036854775808; +SELECT toInt64('9223372036854775808') == --9223372036854775808; +``` ::: **Returned value** @@ -718,11 +770,7 @@ Binary, octal, and hexadecimal representations of numbers are not supported. Lea - 64-bit integer value. [Int64](../data-types/int-uint.md). [Int64](../data-types/int-uint.md). :::note -Function uses [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning it truncates fractional digits of numbers. -::: - -:::danger -An exception is thrown for [NaN and Inf](../data-types/float.md/#data_type-float-nan-inf) arguments. Keep in mind [numeric conversions issues](#common-issues-with-data-conversion), when using this function. +The function uses [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning it truncates fractional digits of numbers. ::: **Example** @@ -752,32 +800,33 @@ Result: ## toInt64OrZero -Like [`toInt64`](#toint64), takes an argument of type [String](../data-types/string.md) and tries to parse it to type `Int64`. If unsuccessful, returns `0`. +Like [`toInt64`](#toint64), this function converts an input value to a value of type [Int8](../data-types/int-uint.md) but returns `0` in case of an error. **Syntax** ```sql -toInt64OrZero(expr) +toInt64OrZero(x) ``` **Arguments** -- `expr` — Expression returning a number or a string with the decimal representation of a number. [Expression](../syntax.md/#syntax-expressions) / [String](../data-types/string.md). +- `x` — A String representation of a number. [String](../data-types/string.md). -:::note -Binary, octal, and hexadecimal representations of numbers are not supported. Leading zeroes are stripped. -::: +Supported types: +- String representations of (U)Int8/16/32/128/256 + +Types for which `0` is returned: +- String representations of ordinary Float32/64 values. +- String representations of Float values `NaN` and `Inf`. +- String representations of binary and hexadecimal values, e.g. `SELECT toInt64OrZero('0xc0fe');`. +- If the input value cannot be represented within the bounds of [Int64](../data-types/int-uint.md) and the result over or under flows. **Returned value** - 64-bit integer value if successful, otherwise `0`. [Int64](../data-types/int-uint.md). :::note -Function uses [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning it truncates fractional digits of numbers. -::: - -:::danger -An exception is thrown for [NaN and Inf](../data-types/float.md/#data_type-float-nan-inf) arguments. Keep in mind [numeric conversions issues](#common-issues-with-data-conversion), when using this function. +The function uses [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning it truncates fractional digits of numbers. ::: **Example** @@ -806,32 +855,33 @@ Result: ## toInt64OrNull -Like [`toInt64`], takes an argument of type [String](../data-types/string.md) and tries to parse it to type `Int64`. If unsuccessful, returns `NULL`. +Like [`toInt64`], takes an argument of type [String](../data-types/string.md) and tries to parse it to type [`Int64`](../data-types/nullable.md). If unsuccessful, returns [`NULL`](../data-types/nullable.md). **Syntax** ```sql -toInt64OrNull(expr) +toInt64OrNull(x) ``` **Arguments** -- `expr` — Expression returning a number or a string with the decimal representation of a number. [Expression](../syntax.md/#syntax-expressions) / [String](../data-types/string.md). +- `x` — A String representation of a number. [Expression](../syntax.md/#syntax-expressions) / [String](../data-types/string.md). -:::note -Binary, octal, and hexadecimal representations of numbers are not supported. Leading zeroes are stripped. -::: +Supported types: +- String representations of (U)Int8/16/32/128/256 + +Types for which `\N` is returned: +- String representations of ordinary Float32/64 values. +- String representations of Float values `NaN` and `Inf`. +- String representations of binary and hexadecimal values, e.g. `SELECT toInt64OrNull('0xc0fe');`. +- If the input value cannot be represented within the bounds of [Int64](../data-types/int-uint.md) and the result over or under flows. **Returned value** - Integer value of type `Int64` if successful, otherwise `NULL`. [Int64](../data-types/int-uint.md) / [NULL](../data-types/nullable.md). :::note -Function uses [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning it truncates fractional digits of numbers. -::: - -:::danger -An exception is thrown for [NaN and Inf](../data-types/float.md/#data_type-float-nan-inf) arguments. Keep in mind [numeric conversions issues](#common-issues-with-data-conversion), when using this function. +The function uses [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning it truncates fractional digits of numbers. ::: **Example** @@ -860,7 +910,7 @@ Result: ## toInt64OrDefault -Like [`toInt64`](#toint64), takes an argument of type [String](../data-types/string.md) and tries to parse it to type `Int64`. If unsuccessful, returns the default type value. +Like [`toInt64`](#toint64), takes an argument of type [String](../data-types/string.md) and tries to parse it to type [`Int64`](../data-types/nullable.md). If unsuccessful, returns the default type value. **Syntax** @@ -870,24 +920,26 @@ toInt64OrDefault(expr, def) **Arguments** -- `expr` — Expression returning a number or a string with the decimal representation of a number. [Expression](../syntax.md/#syntax-expressions) / [String](../data-types/string.md). +- `expr` — Expression returning a number or a string representation of a number. [Expression](../syntax.md/#syntax-expressions) / [String](../data-types/string.md). - `def` — The default value to return if parsing to type `Int64` is unsuccessful. [Int64](../data-types/int-uint.md). -:::note -Binary, octal, and hexadecimal representations of numbers are not supported. Leading zeroes are stripped. -::: +Supported types: +- (U)Int8/16/32/64/128/256 +- Float* +- String representations of (U)Int8/16/32/128/256 + +Types for which the default value is returned: +- Float values `NaN` and `Inf` return the default value. +- String representations of binary and hexadecimal values, e.g. `SELECT toInt64OrDefault('0xc0fe', CAST('-1', 'Int64'));` +- If the input value cannot be represented within the bounds of [Int64](../data-types/int-uint.md) and the result over or under flows. **Returned value** - Integer value of type `Int64` if successful, otherwise returns the default value. [Int64](../data-types/int-uint.md). :::note -- Function uses [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning it truncates fractional digits of numbers. +- The function uses [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning it truncates fractional digits of numbers. - The default value type should be the same as the cast type. - ::: - -:::danger -An exception is thrown for [NaN and Inf](../data-types/float.md/#data_type-float-nan-inf) arguments. Keep in mind [numeric conversions issues](#common-issues-with-data-conversion), when using this function. ::: **Example** @@ -916,7 +968,7 @@ Result: ## toInt128 -Converts an input value to a value of type `Int128`. +Converts an input value to a value of type [`Int128`](../data-types/int-uint.md). Throws an exception in case of an error. **Syntax** @@ -926,10 +978,19 @@ toInt128(expr) **Arguments** -- `expr` — Expression returning a number or a string with the decimal representation of a number. [Expression](../syntax.md/#syntax-expressions). +- `expr` — Expression returning a number or a string representation of a number. [Expression](../syntax.md/#syntax-expressions). + +Supported types: +- (U)Int8/16/32/64/128/256 +- Float* +- String representations of (U)Int8/16/32/128/256 + +Unsupported types: +- Float values `NaN` and `Inf` throw an exception. +- String representations of binary and hexadecimal values, e.g. `SELECT toInt128('0xc0fe');` :::note -Binary, octal, and hexadecimal representations of numbers are not supported. Leading zeroes are stripped. +If the input value cannot be represented within the bounds of [Int128](../data-types/int-uint.md), the result over or under flows. This is not considered an error. ::: **Returned value** @@ -937,11 +998,7 @@ Binary, octal, and hexadecimal representations of numbers are not supported. Lea - 128-bit integer value. [Int128](../data-types/int-uint.md). :::note -Function uses [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning it truncates fractional digits of numbers. -::: - -:::danger -An exception is thrown for [NaN and Inf](../data-types/float.md/#data_type-float-nan-inf) arguments. Keep in mind [numeric conversions issues](#common-issues-with-data-conversion), when using this function. +The function uses [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning it truncates fractional digits of numbers. ::: **Example** @@ -971,7 +1028,7 @@ Result: ## toInt128OrZero -Like [`toInt128`](#toint128), takes an argument of type [String](../data-types/string.md) and tries to parse it to type `Int128`. If unsuccessful, returns `0`. +Like [`toInt128`](#toint128), this function converts an input value to a value of type [Int8](../data-types/int-uint.md) but returns `0` in case of an error. **Syntax** @@ -981,22 +1038,23 @@ toInt128OrZero(expr) **Arguments** -- `expr` — Expression returning a number or a string with the decimal representation of a number. [Expression](../syntax.md/#syntax-expressions) / [String](../data-types/string.md). +- `expr` — Expression returning a number or a string representation of a number. [Expression](../syntax.md/#syntax-expressions) / [String](../data-types/string.md). -:::note -Binary, octal, and hexadecimal representations of numbers are not supported. Leading zeroes are stripped. -::: +Supported types: +- String representations of (U)Int8/16/32/128/256 + +Types for which `0` is returned: +- String representations of ordinary Float32/64 values. +- String representations of Float values `NaN` and `Inf`. +- String representations of binary and hexadecimal values, e.g. `SELECT toInt128OrZero('0xc0fe');`. +- If the input value cannot be represented within the bounds of [Int128](../data-types/int-uint.md) and the result over or under flows. **Returned value** - 128-bit integer value if successful, otherwise `0`. [Int128](../data-types/int-uint.md). :::note -Function uses [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning it truncates fractional digits of numbers. -::: - -:::danger -An exception is thrown for [NaN and Inf](../data-types/float.md/#data_type-float-nan-inf) arguments. Keep in mind [numeric conversions issues](#common-issues-with-data-conversion), when using this function. +The function uses [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning it truncates fractional digits of numbers. ::: **Example** @@ -1025,32 +1083,33 @@ Result: ## toInt128OrNull -Like [`toInt128`](#toint128), takes an argument of type [String](../data-types/string.md) and tries to parse it to type `Int128`. If unsuccessful, returns `NULL`. +Like [`toInt128`](#toint128), takes an argument of type [String](../data-types/string.md) and tries to parse it to type [`Int128`](../data-types/int-uint.md). If unsuccessful, returns [`NULL`](../data-types/nullable.md). **Syntax** ```sql -toInt128OrNull(expr) +toInt128OrNull(x) ``` **Arguments** -- `expr` — Expression returning a number or a string with the decimal representation of a number. [Expression](../syntax.md/#syntax-expressions) / [String](../data-types/string.md). +- `x` — A String representation of a number. [Expression](../syntax.md/#syntax-expressions) / [String](../data-types/string.md). -:::note -Binary, octal, and hexadecimal representations of numbers are not supported. Leading zeroes are stripped. -::: +Supported types: +- String representations of (U)Int8/16/32/128/256 + +Types for which `\N` is returned: +- String representations of ordinary Float32/64 values. +- String representations of Float values `NaN` and `Inf`. +- String representations of binary and hexadecimal values, e.g. `SELECT toInt128OrNull('0xc0fe');`. +- If the input value cannot be represented within the bounds of [Int128](../data-types/int-uint.md) and the result over or under flows. **Returned value** - 128-bit integer value if successful, otherwise `NULL`. [Int128](../data-types/int-uint.md) / [NULL](../data-types/nullable.md). :::note -Function uses [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning it truncates fractional digits of numbers. -::: - -:::danger -An exception is thrown for [NaN and Inf](../data-types/float.md/#data_type-float-nan-inf) arguments. Keep in mind [numeric conversions issues](#common-issues-with-data-conversion), when using this function. +The function uses [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning it truncates fractional digits of numbers. ::: **Example** @@ -1079,7 +1138,7 @@ Result: ## toInt128OrDefault -Like [`toInt128`](#toint128), takes an argument of type [String](../data-types/string.md) and tries to parse it to type `Int128`. If unsuccessful, returns the default type value. +Like [`toInt128`](#toint128), takes an argument of type [String](../data-types/string.md) and tries to parse it to type [`Int128`](../data-types/int-uint.md). If unsuccessful, returns the default type value. **Syntax** @@ -1089,26 +1148,28 @@ toInt128OrDefault(expr, def) **Arguments** -- `expr` — Expression returning a number or a string with the decimal representation of a number. [Expression](../syntax.md/#syntax-expressions) / [String](../data-types/string.md). +- `expr` — Expression returning a number or a string representation of a number. [Expression](../syntax.md/#syntax-expressions) / [String](../data-types/string.md). - `def` — The default value to return if parsing to type `Int128` is unsuccessful. [Int128](../data-types/int-uint.md). -:::note -Binary, octal, and hexadecimal representations of numbers are not supported. Leading zeroes are stripped. -::: +Supported types: +- (U)Int8/16/32/64/128/256 +- Float* +- String representations of (U)Int8/16/32/128/256 + +Types for which the default value is returned: +- Float values `NaN` and `Inf` return the default value. +- String representations of binary and hexadecimal values, e.g. `SELECT toInt128OrDefault('0xc0fe', CAST('-1', 'Int128'));` +- If the input value cannot be represented within the bounds of [Int128](../data-types/int-uint.md) and the result over or under flows. **Returned value** - 128-bit integer value if successful, otherwise returns the default value. [Int128](../data-types/int-uint.md). :::note -- Function uses [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning it truncates fractional digits of numbers. +- The function uses [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning it truncates fractional digits of numbers. - The default value type should be the same as the cast type. ::: -:::danger -An exception is thrown for [NaN and Inf](../data-types/float.md/#data_type-float-nan-inf) arguments. Keep in mind [numeric conversions issues](#common-issues-with-data-conversion), when using this function. -::: - **Example** Query: @@ -1135,7 +1196,7 @@ Result: ## toInt256 -Converts an input value to a value of type `Int256`. +Converts an input value to a value of type [`Int256`](../data-types/int-uint.md). Throws an exception in case of an error. **Syntax** @@ -1145,10 +1206,19 @@ toInt256(expr) **Arguments** -- `expr` — Expression returning a number or a string with the decimal representation of a number. [Expression](../syntax.md/#syntax-expressions). +- `expr` — Expression returning a number or a string representation of a number. [Expression](../syntax.md/#syntax-expressions). + +Supported types: +- (U)Int8/16/32/64/128/256 +- Float* +- String representations of (U)Int8/16/32/128/256 + +Unsupported types: +- Float values `NaN` and `Inf` throw an exception. +- String representations of binary and hexadecimal values, e.g. `SELECT toInt256('0xc0fe');` :::note -Binary, octal, and hexadecimal representations of numbers are not supported. Leading zeroes are stripped. +If the input value cannot be represented within the bounds of [Int256](../data-types/int-uint.md), the result over or under flows. This is not considered an error. ::: **Returned value** @@ -1156,11 +1226,7 @@ Binary, octal, and hexadecimal representations of numbers are not supported. Lea - 256-bit integer value. [Int256](../data-types/int-uint.md). :::note -Function uses [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning it truncates fractional digits of numbers. -::: - -:::danger -An exception is thrown for [NaN and Inf](../data-types/float.md/#data_type-float-nan-inf) arguments. Keep in mind [numeric conversions issues](#common-issues-with-data-conversion), when using this function. +The function uses [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning it truncates fractional digits of numbers. ::: **Example** @@ -1190,32 +1256,33 @@ Result: ## toInt256OrZero -Like [`toInt256`](#toint256), takes an argument of type [String](../data-types/string.md) and tries to parse it to type `Int256`. If unsuccessful, returns `0`. +Like [`toInt256`](#toint256), this function converts an input value to a value of type [Int8](../data-types/int-uint.md) but returns `0` in case of an error. **Syntax** ```sql -toInt256OrZero(expr) +toInt256OrZero(x) ``` **Arguments** -- `expr` — Expression returning a number or a string with the decimal representation of a number. [Expression](../syntax.md/#syntax-expressions) / [String](../data-types/string.md). +- `x` — A String representation of a number. [String](../data-types/string.md). -:::note -Binary, octal, and hexadecimal representations of numbers are not supported. Leading zeroes are stripped. -::: +Supported types: +- String representations of (U)Int8/16/32/128/256 + +Types for which `0` is returned: +- String representations of ordinary Float32/64 values. +- String representations of Float values `NaN` and `Inf`. +- String representations of binary and hexadecimal values, e.g. `SELECT toInt256OrZero('0xc0fe');`. +- If the input value cannot be represented within the bounds of [Int256](../data-types/int-uint.md) and the result over or under flows. **Returned value** - 256-bit integer value if successful, otherwise `0`. [Int256](../data-types/int-uint.md). :::note -Functions uses [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning it truncates fractional digits of numbers. -::: - -:::danger -An exception is thrown for [NaN and Inf](../data-types/float.md/#data_type-float-nan-inf) arguments. Keep in mind [numeric conversions issues](#common-issues-with-data-conversion), when using this function. +The function uses [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning it truncates fractional digits of numbers. ::: **Example** @@ -1244,32 +1311,33 @@ Result: ## toInt256OrNull -Like [`toInt256`](#toint256), takes an argument of type [String](../data-types/string.md) and tries to parse it to type `Int256`. If unsuccessful, returns `NULL`. +Like [`toInt256`](#toint256), takes an argument of type [String](../data-types/string.md) and tries to parse it to type [`Int256`](../data-types/int-uint.md). If unsuccessful, returns [`NULL`](../data-types/nullable.md). **Syntax** ```sql -toInt256OrNull(expr) +toInt256OrNull(x) ``` **Arguments** -- `expr` — Expression returning a number or a string with the decimal representation of a number. [Expression](../syntax.md/#syntax-expressions) / [String](../data-types/string.md). +- `x` — A String representation of a number. [String](../data-types/string.md). -:::note -Binary, octal, and hexadecimal representations of numbers are not supported. Leading zeroes are stripped. -::: +Supported types: +- String representations of (U)Int8/16/32/128/256 + +Types for which `\N` is returned: +- String representations of ordinary Float32/64 values. +- String representations of Float values `NaN` and `Inf`. +- String representations of binary and hexadecimal values, e.g. `SELECT toInt256OrNull('0xc0fe');`. +- If the input value cannot be represented within the bounds of [Int256](../data-types/int-uint.md) and the result over or under flows. **Returned value** - 256-bit integer value if successful, otherwise `NULL`. [Int256](../data-types/int-uint.md) / [NULL](../data-types/nullable.md). :::note -Function uses [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning it truncates fractional digits of numbers. -::: - -:::danger -An exception is thrown for [NaN and Inf](../data-types/float.md/#data_type-float-nan-inf) arguments. Keep in mind [numeric conversions issues](#common-issues-with-data-conversion), when using this function. +The function uses [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning it truncates fractional digits of numbers. ::: **Example** @@ -1298,7 +1366,7 @@ Result: ## toInt256OrDefault -Like [`toInt256`](#toint256), takes an argument of type [String](../data-types/string.md) and tries to parse it to type `Int256`. If unsuccessful, returns the default type value. +Like [`toInt256`](#toint256), takes an argument of type [String](../data-types/string.md) and tries to parse it to type [`Int256`](../data-types/int-uint.md). If unsuccessful, returns the default type value. **Syntax** @@ -1308,26 +1376,28 @@ toInt256OrDefault(expr, def) **Arguments** -- `expr` — Expression returning a number or a string with the decimal representation of a number. [Expression](../syntax.md/#syntax-expressions) / [String](../data-types/string.md). +- `expr` — Expression returning a number or a string representation of a number. [Expression](../syntax.md/#syntax-expressions) / [String](../data-types/string.md). - `def` — The default value to return if parsing to type `Int256` is unsuccessful. [Int256](../data-types/int-uint.md). -:::note -Binary, octal, and hexadecimal representations of numbers are not supported. Leading zeroes are stripped. -::: +Supported types: +- (U)Int8/16/32/64/128/256 +- Float* +- String representations of (U)Int8/16/32/128/256 + +Types for which the default value is returned: +- Float values `NaN` and `Inf` return the default value. +- String representations of binary and hexadecimal values, e.g. `SELECT toInt128OrDefault('0xc0fe', CAST('-1', 'Int256'));` +- If the input value cannot be represented within the bounds of [Int256](../data-types/int-uint.md) and the result over or under flows. **Returned value** - 256-bit integer value if successful, otherwise returns the default value. [Int256](../data-types/int-uint.md). :::note -- Function uses [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning it truncates fractional digits of numbers. +- The function uses [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning it truncates fractional digits of numbers. - The default value type should be the same as the cast type. ::: -:::danger -An exception is thrown for [NaN and Inf](../data-types/float.md/#data_type-float-nan-inf) arguments. Keep in mind [numeric conversions issues](#common-issues-with-data-conversion), when using this function. -::: - **Example** Query: From 3c1004aee4a3b1f3e1b0bd91a1b02c6c9e16c832 Mon Sep 17 00:00:00 2001 From: Blargian Date: Thu, 25 Jul 2024 21:26:14 +0200 Subject: [PATCH 219/661] Fix typo --- .../functions/type-conversion-functions.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/docs/en/sql-reference/functions/type-conversion-functions.md b/docs/en/sql-reference/functions/type-conversion-functions.md index 844d957d538..4326753216e 100644 --- a/docs/en/sql-reference/functions/type-conversion-functions.md +++ b/docs/en/sql-reference/functions/type-conversion-functions.md @@ -131,7 +131,7 @@ Types for which `0` is returned: - String representations of ordinary Float32/64 values. - String representations of Float values `NaN` and `Inf`. - String representations of binary and hexadecimal values, e.g. `SELECT toInt8OrZero('0xc0fe');`. -- If the input value cannot be represented within the bounds of [toInt16](../data-types/int-uint.md), and the result over or under flows. +- If the input value cannot be represented within the bounds of [Int8](../data-types/int-uint.md), and the result over or under flows. **Returned value** @@ -186,7 +186,7 @@ Types for which `\N` is returned: - String representations of ordinary Float32/64 values. - String representations of Float values `NaN` and `Inf`. - String representations of binary and hexadecimal values, e.g. `SELECT toInt8OrNull('0xc0fe');`. -- If the input value cannot be represented within the bounds of [Int16](../data-types/int-uint.md), and the result over or under flows. +- If the input value cannot be represented within the bounds of [Int8](../data-types/int-uint.md), and the result over or under flows. **Returned value** @@ -300,7 +300,7 @@ Unsupported types: - String representations of binary and hexadecimal values, e.g. `SELECT toInt16('0xc0fe');` :::note -If the input value cannot be represented within the bounds of [toInt16](../data-types/int-uint.md), the result over or under flows. This is not considered an error. +If the input value cannot be represented within the bounds of [Int16](../data-types/int-uint.md), the result over or under flows. This is not considered an error. For example: `SELECT toInt16(32768) == -32768;`, `SELECT toInt16(32768) == -32768;`, `SELECT toInt16('32768') == -32768;`. ::: @@ -527,7 +527,7 @@ Unsupported types: - String representations of binary and hexadecimal values, e.g. `SELECT toInt32('0xc0fe');` :::note -If the input value cannot be represented within the bounds of [toInt16](../data-types/int-uint.md), the result over or under flows. This is not considered an error. +If the input value cannot be represented within the bounds of [Int32](../data-types/int-uint.md), the result over or under flows. This is not considered an error. For example: ``` SELECT toInt32(2147483648) == -2147483648; @@ -755,7 +755,7 @@ Unsupported types: - String representations of binary and hexadecimal values, e.g. `SELECT toInt64('0xc0fe');` :::note -If the input value cannot be represented within the bounds of [Int16](../data-types/int-uint.md), the result over or under flows. This is not considered an error. +If the input value cannot be represented within the bounds of [Int64](../data-types/int-uint.md), the result over or under flows. This is not considered an error. For example: ``` @@ -1386,7 +1386,7 @@ Supported types: Types for which the default value is returned: - Float values `NaN` and `Inf` return the default value. -- String representations of binary and hexadecimal values, e.g. `SELECT toInt128OrDefault('0xc0fe', CAST('-1', 'Int256'));` +- String representations of binary and hexadecimal values, e.g. `SELECT toInt256OrDefault('0xc0fe', CAST('-1', 'Int256'));` - If the input value cannot be represented within the bounds of [Int256](../data-types/int-uint.md) and the result over or under flows. **Returned value** From f2e83f092d1f677c4e0240e749f96766ff6e205c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Thu, 25 Jul 2024 21:56:42 +0200 Subject: [PATCH 220/661] Patch getauxval for tsan re-exec --- base/glibc-compatibility/CMakeLists.txt | 10 ++++++++++ base/glibc-compatibility/musl/getauxval.c | 4 ++-- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/base/glibc-compatibility/CMakeLists.txt b/base/glibc-compatibility/CMakeLists.txt index c967fa5b11b..8948e25cb8e 100644 --- a/base/glibc-compatibility/CMakeLists.txt +++ b/base/glibc-compatibility/CMakeLists.txt @@ -18,6 +18,16 @@ if (GLIBC_COMPATIBILITY) message (FATAL_ERROR "glibc_compatibility can only be used on x86_64 or aarch64.") endif () + if (SANITIZE STREQUAL thread) + # Disable TSAN instrumentation that conflicts with re-exec due to high ASLR entropy using getauxval + # See longer comment in __auxv_init_procfs + # In the case of tsan we need to make sure getauxval is not instrumented as that would introduce tsan + # internal calls to functions that depend on a state that isn't initialized yet + set_source_files_properties( + musl/getauxval.c + PROPERTIES COMPILE_FLAGS "-mllvm -tsan-instrument-func-entry-exit=false") + endif() + # Need to omit frame pointers to match the performance of glibc set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fomit-frame-pointer") diff --git a/base/glibc-compatibility/musl/getauxval.c b/base/glibc-compatibility/musl/getauxval.c index 28cb0f8d005..ec2cce1e4aa 100644 --- a/base/glibc-compatibility/musl/getauxval.c +++ b/base/glibc-compatibility/musl/getauxval.c @@ -102,7 +102,7 @@ static unsigned long NO_SANITIZE_THREAD __auxv_init_procfs(unsigned long type) /// most global variables aren't initialized or available yet, so we can't initiate the auxiliary vector. /// Normal glibc / musl getauxval doesn't have this problem since they initiate their auxval vector at the very /// start of __libc_start_main (just keeping track of argv+argc+1), but we don't have such option (otherwise - // this complexity of reading "/proc/self/auxv" or using __environ would not be necessary). + /// this complexity of reading "/proc/self/auxv" or using __environ would not be necessary). /// To avoid this crashes on the re-exec call (see above how it would fail when creating `aux`, and if we used /// __auxv_init_environ then it would SIGSEV on READing `__environ`) we capture this call for `AT_EXECFN` and @@ -237,7 +237,7 @@ static unsigned long NO_SANITIZE_THREAD __auxv_init_environ(unsigned long type) // - __auxv_init_procfs -> __auxv_init_environ -> __getauxval_environ static void * volatile getauxval_func = (void *)__auxv_init_procfs; -unsigned long getauxval(unsigned long type) +unsigned long NO_SANITIZE_THREAD getauxval(unsigned long type) { return ((unsigned long (*)(unsigned long))getauxval_func)(type); } From 3f70977cd660e4617d9bbd68cc229020adc57f98 Mon Sep 17 00:00:00 2001 From: jsc0218 Date: Thu, 25 Jul 2024 21:02:30 +0000 Subject: [PATCH 221/661] try to fix --- ...2572_query_views_log_background_thread.reference | 13 +++++++++---- .../02572_query_views_log_background_thread.sql | 8 ++++++-- 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/tests/queries/0_stateless/02572_query_views_log_background_thread.reference b/tests/queries/0_stateless/02572_query_views_log_background_thread.reference index 22dfaf93781..f867fd0d085 100644 --- a/tests/queries/0_stateless/02572_query_views_log_background_thread.reference +++ b/tests/queries/0_stateless/02572_query_views_log_background_thread.reference @@ -1,15 +1,14 @@ -- { echoOn } insert into buffer_02572 values (1); -- ensure that the flush was not direct +select * from buffer_02572; +1 select * from data_02572; select * from copy_02572; -- we cannot use OPTIMIZE, this will attach query context, so let's wait SET function_sleep_max_microseconds_per_block = 6000000; select sleepEachRow(1) from numbers(3*2) format Null; -select * from data_02572; -1 -select * from copy_02572; -1 +select sleepEachRow(1) from numbers(3*2) format Null; system flush logs; select count() > 0, lower(status::String), errorCodeToName(exception_code) from system.query_views_log where @@ -18,3 +17,9 @@ select count() > 0, lower(status::String), errorCodeToName(exception_code) group by 2, 3 ; 1 queryfinish OK +select * from buffer_02572; +1 +select * from data_02572; +1 +select * from copy_02572; +1 diff --git a/tests/queries/0_stateless/02572_query_views_log_background_thread.sql b/tests/queries/0_stateless/02572_query_views_log_background_thread.sql index 939c189c5fe..2e9a62b71da 100644 --- a/tests/queries/0_stateless/02572_query_views_log_background_thread.sql +++ b/tests/queries/0_stateless/02572_query_views_log_background_thread.sql @@ -19,13 +19,13 @@ create materialized view mv_02572 to copy_02572 as select * from data_02572; -- { echoOn } insert into buffer_02572 values (1); -- ensure that the flush was not direct +select * from buffer_02572; select * from data_02572; select * from copy_02572; -- we cannot use OPTIMIZE, this will attach query context, so let's wait SET function_sleep_max_microseconds_per_block = 6000000; select sleepEachRow(1) from numbers(3*2) format Null; -select * from data_02572; -select * from copy_02572; +select sleepEachRow(1) from numbers(3*2) format Null; system flush logs; select count() > 0, lower(status::String), errorCodeToName(exception_code) @@ -34,3 +34,7 @@ select count() > 0, lower(status::String), errorCodeToName(exception_code) view_target = concatWithSeparator('.', currentDatabase(), 'copy_02572') group by 2, 3 ; + +select * from buffer_02572; +select * from data_02572; +select * from copy_02572; \ No newline at end of file From f0faa111d73c8512c1f88009f0ecfd1a804de45c Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Thu, 25 Jul 2024 23:19:58 +0200 Subject: [PATCH 222/661] Fix wrong usage of input_format_max_bytes_to_read_for_schema_inference --- src/Formats/FormatFactory.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index 1271cdfb7ad..e8956159714 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -253,7 +253,7 @@ FormatSettings getFormatSettings(const ContextPtr & context, const Settings & se format_settings.msgpack.number_of_columns = settings.input_format_msgpack_number_of_columns; format_settings.msgpack.output_uuid_representation = settings.output_format_msgpack_uuid_representation; format_settings.max_rows_to_read_for_schema_inference = settings.input_format_max_rows_to_read_for_schema_inference; - format_settings.max_bytes_to_read_for_schema_inference = settings.input_format_max_rows_to_read_for_schema_inference; + format_settings.max_bytes_to_read_for_schema_inference = settings.input_format_max_bytes_to_read_for_schema_inference; format_settings.column_names_for_schema_inference = settings.column_names_for_schema_inference; format_settings.schema_inference_hints = settings.schema_inference_hints; format_settings.schema_inference_make_columns_nullable = settings.schema_inference_make_columns_nullable; From 18fb7396f941fd5a7e3872788ab07a18731dc943 Mon Sep 17 00:00:00 2001 From: xc0derx <11428624+xc0derx@users.noreply.github.com> Date: Thu, 25 Jul 2024 23:21:30 +0200 Subject: [PATCH 223/661] fix broken links (compression codecs) --- docs/en/sql-reference/statements/alter/column.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/en/sql-reference/statements/alter/column.md b/docs/en/sql-reference/statements/alter/column.md index aa6f132e08e..2e9b0cf3080 100644 --- a/docs/en/sql-reference/statements/alter/column.md +++ b/docs/en/sql-reference/statements/alter/column.md @@ -36,7 +36,7 @@ These actions are described in detail below. ADD COLUMN [IF NOT EXISTS] name [type] [default_expr] [codec] [AFTER name_after | FIRST] ``` -Adds a new column to the table with the specified `name`, `type`, [`codec`](../create/table.md/#codecs) and `default_expr` (see the section [Default expressions](/docs/en/sql-reference/statements/create/table.md/#create-default-values)). +Adds a new column to the table with the specified `name`, `type`, [`codec`](../create/table.md/#column_compression_codec) and `default_expr` (see the section [Default expressions](/docs/en/sql-reference/statements/create/table.md/#create-default-values)). If the `IF NOT EXISTS` clause is included, the query won’t return an error if the column already exists. If you specify `AFTER name_after` (the name of another column), the column is added after the specified one in the list of table columns. If you want to add a column to the beginning of the table use the `FIRST` clause. Otherwise, the column is added to the end of the table. For a chain of actions, `name_after` can be the name of a column that is added in one of the previous actions. @@ -155,7 +155,7 @@ This query changes the `name` column properties: - Column-level Settings -For examples of columns compression CODECS modifying, see [Column Compression Codecs](../create/table.md/#codecs). +For examples of columns compression CODECS modifying, see [Column Compression Codecs](../create/table.md/#column_compression_codec). For examples of columns TTL modifying, see [Column TTL](/docs/en/engines/table-engines/mergetree-family/mergetree.md/#mergetree-column-ttl). From d0c4c4151c5e4bcb86b9417f4ab8cc71316404b5 Mon Sep 17 00:00:00 2001 From: Shri Bodas Date: Thu, 25 Jul 2024 14:24:28 -0700 Subject: [PATCH 224/661] Update keepermap.md Needs quotes around keeper path --- docs/en/engines/table-engines/special/keepermap.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/engines/table-engines/special/keepermap.md b/docs/en/engines/table-engines/special/keepermap.md index 5559cc2c648..04a9a4b0d4e 100644 --- a/docs/en/engines/table-engines/special/keepermap.md +++ b/docs/en/engines/table-engines/special/keepermap.md @@ -54,7 +54,7 @@ CREATE TABLE keeper_map_table `v2` String, `v3` Float32 ) -ENGINE = KeeperMap(/keeper_map_table, 4) +ENGINE = KeeperMap('/keeper_map_table', 4) PRIMARY KEY key ``` From 321766d0b8161a794f11835b4650d30b3723835b Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Thu, 25 Jul 2024 22:51:14 +0000 Subject: [PATCH 225/661] Automatic style fix --- tests/performance/scripts/perf.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tests/performance/scripts/perf.py b/tests/performance/scripts/perf.py index f89784a0e0b..83d66997677 100755 --- a/tests/performance/scripts/perf.py +++ b/tests/performance/scripts/perf.py @@ -349,9 +349,7 @@ for query_index in queries_to_run: try: c.execute("SYSTEM JEMALLOC PURGE") - print( - f"purging jemalloc arenas\t{conn_index}\t{c.last_query.elapsed}" - ) + print(f"purging jemalloc arenas\t{conn_index}\t{c.last_query.elapsed}") except KeyboardInterrupt: raise except: From f3c88ff66707a50523ccef6e964f2fe78a711ace Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 26 Jul 2024 03:56:02 +0200 Subject: [PATCH 226/661] Fix benign data race in ZooKeeper --- src/Common/ZooKeeper/IKeeper.h | 2 +- src/Common/ZooKeeper/TestKeeper.h | 2 +- src/Common/ZooKeeper/ZooKeeper.cpp | 27 ++++++++--------- src/Common/ZooKeeper/ZooKeeper.h | 2 +- src/Common/ZooKeeper/ZooKeeperImpl.cpp | 29 +++++++++++++++++-- src/Common/ZooKeeper/ZooKeeperImpl.h | 9 +++--- .../StorageSystemZooKeeperConnection.cpp | 10 +++++-- 7 files changed, 54 insertions(+), 27 deletions(-) diff --git a/src/Common/ZooKeeper/IKeeper.h b/src/Common/ZooKeeper/IKeeper.h index 2c6cbc4a5d5..ce7489a33e5 100644 --- a/src/Common/ZooKeeper/IKeeper.h +++ b/src/Common/ZooKeeper/IKeeper.h @@ -548,7 +548,7 @@ public: virtual bool isExpired() const = 0; /// Get the current connected node idx. - virtual Int8 getConnectedNodeIdx() const = 0; + virtual std::optional getConnectedNodeIdx() const = 0; /// Get the current connected host and port. virtual String getConnectedHostPort() const = 0; diff --git a/src/Common/ZooKeeper/TestKeeper.h b/src/Common/ZooKeeper/TestKeeper.h index 2194ad015bf..562c313ac0e 100644 --- a/src/Common/ZooKeeper/TestKeeper.h +++ b/src/Common/ZooKeeper/TestKeeper.h @@ -39,7 +39,7 @@ public: ~TestKeeper() override; bool isExpired() const override { return expired; } - Int8 getConnectedNodeIdx() const override { return 0; } + std::optional getConnectedNodeIdx() const override { return 0; } String getConnectedHostPort() const override { return "TestKeeper:0000"; } int32_t getConnectionXid() const override { return 0; } int64_t getSessionID() const override { return 0; } diff --git a/src/Common/ZooKeeper/ZooKeeper.cpp b/src/Common/ZooKeeper/ZooKeeper.cpp index 01bb508da95..1250e1273b9 100644 --- a/src/Common/ZooKeeper/ZooKeeper.cpp +++ b/src/Common/ZooKeeper/ZooKeeper.cpp @@ -128,16 +128,15 @@ void ZooKeeper::init(ZooKeeperArgs args_, std::unique_ptr ShuffleHosts shuffled_hosts = shuffleHosts(); impl = std::make_unique(shuffled_hosts, args, zk_log); - Int8 node_idx = impl->getConnectedNodeIdx(); + auto node_idx = impl->getConnectedNodeIdx(); if (args.chroot.empty()) LOG_TRACE(log, "Initialized, hosts: {}", fmt::join(args.hosts, ",")); else LOG_TRACE(log, "Initialized, hosts: {}, chroot: {}", fmt::join(args.hosts, ","), args.chroot); - /// If the balancing strategy has an optimal node then it will be the first in the list - bool connected_to_suboptimal_node = node_idx != shuffled_hosts[0].original_index; + bool connected_to_suboptimal_node = node_idx && *node_idx != shuffled_hosts[0].original_index; bool respect_az = args.prefer_local_availability_zone && !args.client_availability_zone.empty(); bool may_benefit_from_reconnecting = respect_az || args.get_priority_load_balancing.hasOptimalNode(); if (connected_to_suboptimal_node && may_benefit_from_reconnecting) @@ -145,7 +144,7 @@ void ZooKeeper::init(ZooKeeperArgs args_, std::unique_ptr auto reconnect_timeout_sec = getSecondsUntilReconnect(args); LOG_DEBUG(log, "Connected to a suboptimal ZooKeeper host ({}, index {})." " To preserve balance in ZooKeeper usage, this ZooKeeper session will expire in {} seconds", - impl->getConnectedHostPort(), node_idx, reconnect_timeout_sec); + impl->getConnectedHostPort(), *node_idx, reconnect_timeout_sec); auto reconnect_task_holder = DB::Context::getGlobalContextInstance()->getSchedulePool().createTask("ZKReconnect", [this, optimal_host = shuffled_hosts[0]]() { @@ -154,13 +153,15 @@ void ZooKeeper::init(ZooKeeperArgs args_, std::unique_ptr LOG_DEBUG(log, "Trying to connect to a more optimal node {}", optimal_host.host); ShuffleHosts node{optimal_host}; std::unique_ptr new_impl = std::make_unique(node, args, zk_log); - Int8 new_node_idx = new_impl->getConnectedNodeIdx(); - /// Maybe the node was unavailable when getting AZs first time, update just in case - if (args.availability_zone_autodetect && availability_zones[new_node_idx].empty()) + if (auto new_node_idx = new_impl->getConnectedNodeIdx(); new_node_idx) { - availability_zones[new_node_idx] = new_impl->tryGetAvailabilityZone(); - LOG_DEBUG(log, "Got availability zone for {}: {}", optimal_host.host, availability_zones[new_node_idx]); + /// Maybe the node was unavailable when getting AZs first time, update just in case + if (args.availability_zone_autodetect && availability_zones[*new_node_idx].empty()) + { + availability_zones[*new_node_idx] = new_impl->tryGetAvailabilityZone(); + LOG_DEBUG(log, "Got availability zone for {}: {}", optimal_host.host, availability_zones[*new_node_idx]); + } } optimal_impl = std::move(new_impl); @@ -1525,7 +1526,7 @@ void ZooKeeper::setServerCompletelyStarted() zk->setServerCompletelyStarted(); } -Int8 ZooKeeper::getConnectedHostIdx() const +std::optional ZooKeeper::getConnectedHostIdx() const { return impl->getConnectedNodeIdx(); } @@ -1544,10 +1545,10 @@ String ZooKeeper::getConnectedHostAvailabilityZone() const { if (args.implementation != "zookeeper" || !impl) return ""; - Int8 idx = impl->getConnectedNodeIdx(); - if (idx < 0) + std::optional idx = impl->getConnectedNodeIdx(); + if (!idx) return ""; /// session expired - return availability_zones.at(idx); + return availability_zones.at(*idx); } size_t getFailedOpIndex(Coordination::Error exception_code, const Coordination::Responses & responses) diff --git a/src/Common/ZooKeeper/ZooKeeper.h b/src/Common/ZooKeeper/ZooKeeper.h index 4ae2cfa6096..657c9cb2c03 100644 --- a/src/Common/ZooKeeper/ZooKeeper.h +++ b/src/Common/ZooKeeper/ZooKeeper.h @@ -620,7 +620,7 @@ public: void setServerCompletelyStarted(); - Int8 getConnectedHostIdx() const; + std::optional getConnectedHostIdx() const; String getConnectedHostPort() const; int32_t getConnectionXid() const; diff --git a/src/Common/ZooKeeper/ZooKeeperImpl.cpp b/src/Common/ZooKeeper/ZooKeeperImpl.cpp index 2728f953bea..53c7a5728aa 100644 --- a/src/Common/ZooKeeper/ZooKeeperImpl.cpp +++ b/src/Common/ZooKeeper/ZooKeeperImpl.cpp @@ -536,7 +536,7 @@ void ZooKeeper::connect( compressed_out.emplace(*out, CompressionCodecFactory::instance().get("LZ4", {})); } - original_index = static_cast(node.original_index); + original_index.store(node.original_index); break; } catch (...) @@ -1014,8 +1014,7 @@ void ZooKeeper::finalize(bool error_send, bool error_receive, const String & rea LOG_INFO(log, "Finalizing session {}. finalization_started: {}, queue_finished: {}, reason: '{}'", session_id, already_started, requests_queue.isFinished(), reason); - /// Reset the original index. - original_index = -1; + original_index.store(-1); auto expire_session_if_not_expired = [&] { @@ -1534,6 +1533,30 @@ void ZooKeeper::close() } +std::optional ZooKeeper::getConnectedNodeIdx() const +{ + int8_t res = original_index.load(); + if (res == -1) + return std::nullopt; + else + return res; +} + +String ZooKeeper::getConnectedHostPort() const +{ + auto idx = getConnectedNodeIdx(); + if (idx) + return args.hosts[*idx]; + else + return ""; +} + +int32_t ZooKeeper::getConnectionXid() const +{ + return next_xid.load(); +} + + void ZooKeeper::setZooKeeperLog(std::shared_ptr zk_log_) { /// logOperationIfNeeded(...) uses zk_log and can be called from different threads, so we have to use atomic shared_ptr diff --git a/src/Common/ZooKeeper/ZooKeeperImpl.h b/src/Common/ZooKeeper/ZooKeeperImpl.h index 0c88c35b381..39082cd14c1 100644 --- a/src/Common/ZooKeeper/ZooKeeperImpl.h +++ b/src/Common/ZooKeeper/ZooKeeperImpl.h @@ -114,13 +114,12 @@ public: ~ZooKeeper() override; - /// If expired, you can only destroy the object. All other methods will throw exception. bool isExpired() const override { return requests_queue.isFinished(); } - Int8 getConnectedNodeIdx() const override { return original_index; } - String getConnectedHostPort() const override { return (original_index == -1) ? "" : args.hosts[original_index]; } - int32_t getConnectionXid() const override { return next_xid.load(); } + std::optional getConnectedNodeIdx() const override; + String getConnectedHostPort() const override; + int32_t getConnectionXid() const override; String tryGetAvailabilityZone() override; @@ -219,7 +218,7 @@ private: ACLs default_acls; zkutil::ZooKeeperArgs args; - Int8 original_index = -1; + std::atomic original_index{-1}; /// Fault injection void maybeInjectSendFault(); diff --git a/src/Storages/System/StorageSystemZooKeeperConnection.cpp b/src/Storages/System/StorageSystemZooKeeperConnection.cpp index ec29b84dac3..72a7ba38429 100644 --- a/src/Storages/System/StorageSystemZooKeeperConnection.cpp +++ b/src/Storages/System/StorageSystemZooKeeperConnection.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -27,7 +28,7 @@ ColumnsDescription StorageSystemZooKeeperConnection::getColumnsDescription() /* 0 */ {"name", std::make_shared(), "ZooKeeper cluster's name."}, /* 1 */ {"host", std::make_shared(), "The hostname/IP of the ZooKeeper node that ClickHouse connected to."}, /* 2 */ {"port", std::make_shared(), "The port of the ZooKeeper node that ClickHouse connected to."}, - /* 3 */ {"index", std::make_shared(), "The index of the ZooKeeper node that ClickHouse connected to. The index is from ZooKeeper config."}, + /* 3 */ {"index", std::make_shared(std::make_shared()), "The index of the ZooKeeper node that ClickHouse connected to. The index is from ZooKeeper config. If not connected, this column is NULL."}, /* 4 */ {"connected_time", std::make_shared(), "When the connection was established."}, /* 5 */ {"session_uptime_elapsed_seconds", std::make_shared(), "Seconds elapsed since the connection was established."}, /* 6 */ {"is_expired", std::make_shared(), "Is the current connection expired."}, @@ -64,7 +65,7 @@ void StorageSystemZooKeeperConnection::fillData(MutableColumns & res_columns, Co /// For read-only snapshot type functionality, it's acceptable even though 'getZooKeeper' may cause data inconsistency. auto fill_data = [&](const String & name, const zkutil::ZooKeeperPtr zookeeper, MutableColumns & columns) { - Int8 index = zookeeper->getConnectedHostIdx(); + auto index = zookeeper->getConnectedHostIdx(); String host_port = zookeeper->getConnectedHostPort(); if (index != -1 && !host_port.empty()) { @@ -78,7 +79,10 @@ void StorageSystemZooKeeperConnection::fillData(MutableColumns & res_columns, Co columns[0]->insert(name); columns[1]->insert(host); columns[2]->insert(port); - columns[3]->insert(index); + if (index) + columns[3]->insert(*index); + else + columns[3]->insertDefault(); columns[4]->insert(connected_time); columns[5]->insert(uptime); columns[6]->insert(zookeeper->expired()); From d6fdf29679ece887567cba6fa43aee4c22c7d6f7 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 26 Jul 2024 04:48:41 +0200 Subject: [PATCH 227/661] Remove too long unit test --- .../tests/gtest_archive_reader_and_writer.cpp | 42 ------------------- 1 file changed, 42 deletions(-) diff --git a/src/IO/tests/gtest_archive_reader_and_writer.cpp b/src/IO/tests/gtest_archive_reader_and_writer.cpp index 898c7017e7d..06f8f53546b 100644 --- a/src/IO/tests/gtest_archive_reader_and_writer.cpp +++ b/src/IO/tests/gtest_archive_reader_and_writer.cpp @@ -492,48 +492,6 @@ TEST_P(ArchiveReaderAndWriterTest, ManyFilesOnDisk) } } -TEST_P(ArchiveReaderAndWriterTest, LargeFile) -{ - /// Make an archive. - std::string_view contents = "The contents of a.txt\n"; - int times = 10000000; - { - auto writer = createArchiveWriter(getPathToArchive()); - { - auto out = writer->writeFile("a.txt", times * contents.size()); - for (int i = 0; i < times; i++) - writeString(contents, *out); - out->finalize(); - } - writer->finalize(); - } - - /// Read the archive. - auto reader = createArchiveReader(getPathToArchive()); - - ASSERT_TRUE(reader->fileExists("a.txt")); - - auto file_info = reader->getFileInfo("a.txt"); - EXPECT_EQ(file_info.uncompressed_size, contents.size() * times); - EXPECT_GT(file_info.compressed_size, 0); - - { - auto in = reader->readFile("a.txt", /*throw_on_not_found=*/true); - for (int i = 0; i < times; i++) - ASSERT_TRUE(checkString(String(contents), *in)); - } - - { - /// Use an enumerator. - auto enumerator = reader->firstFile(); - ASSERT_NE(enumerator, nullptr); - EXPECT_EQ(enumerator->getFileName(), "a.txt"); - EXPECT_EQ(enumerator->getFileInfo().uncompressed_size, contents.size() * times); - EXPECT_GT(enumerator->getFileInfo().compressed_size, 0); - EXPECT_FALSE(enumerator->nextFile()); - } -} - TEST(TarArchiveReaderTest, FileExists) { String archive_path = "archive.tar"; From 9c6026965d985ca0ffcf0ab789d09946bd37c569 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 26 Jul 2024 04:55:53 +0200 Subject: [PATCH 228/661] Fix error --- src/IO/ReadWriteBufferFromHTTP.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/IO/ReadWriteBufferFromHTTP.cpp b/src/IO/ReadWriteBufferFromHTTP.cpp index 17a5ed385d4..a62f22d4bd9 100644 --- a/src/IO/ReadWriteBufferFromHTTP.cpp +++ b/src/IO/ReadWriteBufferFromHTTP.cpp @@ -700,6 +700,14 @@ std::optional ReadWriteBufferFromHTTP::tryGetLastModificationTime() { return std::nullopt; } + catch (const NetException &) + { + return std::nullopt; + } + catch (const Poco::Net::NetException &) + { + return std::nullopt; + } } return file_info->last_modified; From 64ff5d7bc443cdb15fd0a5eec391d449a617b3f9 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 26 Jul 2024 05:07:56 +0200 Subject: [PATCH 229/661] Fix `00705_drop_create_merge_tree` --- tests/queries/0_stateless/00705_drop_create_merge_tree.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/00705_drop_create_merge_tree.sh b/tests/queries/0_stateless/00705_drop_create_merge_tree.sh index d7754091290..ea8b9d02e49 100755 --- a/tests/queries/0_stateless/00705_drop_create_merge_tree.sh +++ b/tests/queries/0_stateless/00705_drop_create_merge_tree.sh @@ -5,8 +5,8 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -yes 'CREATE TABLE IF NOT EXISTS table (x UInt8) ENGINE = MergeTree ORDER BY tuple();' | head -n 1000 | $CLICKHOUSE_CLIENT --ignore-error -nm 2>/dev/null & -yes 'DROP TABLE table;' | head -n 1000 | $CLICKHOUSE_CLIENT --ignore-error -nm 2>/dev/null & +yes 'CREATE TABLE IF NOT EXISTS table (x UInt8) ENGINE = MergeTree ORDER BY tuple();' | head -n 1000 | $CLICKHOUSE_CLIENT --multiquery & +yes 'DROP TABLE IF EXISTS table;' | head -n 1000 | $CLICKHOUSE_CLIENT --multiquery & wait ${CLICKHOUSE_CLIENT} --query "DROP TABLE IF EXISTS table" From 287cce7d211b9386895a4fa898f87405eccb3e96 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Fri, 26 Jul 2024 09:20:15 +0200 Subject: [PATCH 230/661] Fixes --- .gitmodules | 2 +- contrib/numactl | 2 +- docker/test/performance-comparison/run.sh | 1 + programs/server/Server.cpp | 9 ++++++++- 4 files changed, 11 insertions(+), 3 deletions(-) diff --git a/.gitmodules b/.gitmodules index b5d7e1e56b3..7e0b4df4ad1 100644 --- a/.gitmodules +++ b/.gitmodules @@ -374,4 +374,4 @@ url = https://github.com/ClickHouse/double-conversion.git [submodule "contrib/numactl"] path = contrib/numactl - url = https://github.com/numactl/numactl.git + url = https://github.com/ClickHouse/numactl.git diff --git a/contrib/numactl b/contrib/numactl index 3871b1c42fc..8d13d63a05f 160000 --- a/contrib/numactl +++ b/contrib/numactl @@ -1 +1 @@ -Subproject commit 3871b1c42fc71bceadafd745d2eff5dddfc2d67e +Subproject commit 8d13d63a05f0c3cd88bf777cbb61541202b7da08 diff --git a/docker/test/performance-comparison/run.sh b/docker/test/performance-comparison/run.sh index 7afb5da59b1..6ef781fa4c8 100644 --- a/docker/test/performance-comparison/run.sh +++ b/docker/test/performance-comparison/run.sh @@ -13,6 +13,7 @@ entry="/usr/share/clickhouse-test/performance/scripts/entrypoint.sh" # https://www.kernel.org/doc/Documentation/filesystems/tmpfs.txt # Double-escaped backslashes are a tribute to the engineering wonder of docker -- # it gives '/bin/sh: 1: [bash,: not found' otherwise. +numactl --hardware node=$(( RANDOM % $(numactl --hardware | sed -n 's/^.*available:\(.*\)nodes.*$/\1/p') )); echo Will bind to NUMA node $node; numactl --cpunodebind=$node --membind=$node $entry diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index b9a7c298f00..b818ff1f3a2 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -775,9 +775,16 @@ try LOG_INFO( log, - "ClickHouse is bound to a subset of NUMA nodes. Total memory of all available nodes {}", + "ClickHouse is bound to a subset of NUMA nodes. Total memory of all available nodes: {}", ReadableSize(total_numa_memory)); } + else + { + LOG_TRACE( + log, + "All NUMA nodes are used. Detected NUMA nodes: {}", + numa_num_configured_nodes()); + } numa_bitmask_free(membind); } From 400f8e5b2116ab585312e578eee4d783b9d6783b Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Fri, 26 Jul 2024 09:33:46 +0200 Subject: [PATCH 231/661] Fix stacktrace cache --- src/Common/StackTrace.cpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/Common/StackTrace.cpp b/src/Common/StackTrace.cpp index 59a58ac027a..ff8765c9727 100644 --- a/src/Common/StackTrace.cpp +++ b/src/Common/StackTrace.cpp @@ -495,18 +495,19 @@ using StackTraceCacheBase = std::map Date: Fri, 26 Jul 2024 15:38:26 +0800 Subject: [PATCH 232/661] support set orc reader time zone name --- src/Core/Settings.h | 2 +- src/Core/SettingsChangesHistory.cpp | 2 +- src/Formats/FormatFactory.cpp | 2 +- src/Formats/FormatSettings.h | 2 +- .../Formats/Impl/NativeORCBlockInputFormat.cpp | 6 +----- tests/queries/0_stateless/03198_orc_read_time_zone.sh | 10 +++++----- 6 files changed, 10 insertions(+), 14 deletions(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 2a665326afc..a5220c3017c 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -1073,7 +1073,7 @@ class IColumn; M(Bool, input_format_orc_allow_missing_columns, true, "Allow missing columns while reading ORC input formats", 0) \ M(Bool, input_format_orc_use_fast_decoder, true, "Use a faster ORC decoder implementation.", 0) \ M(Bool, input_format_orc_filter_push_down, true, "When reading ORC files, skip whole stripes or row groups based on the WHERE/PREWHERE expressions, min/max statistics or bloom filter in the ORC metadata.", 0) \ - M(Bool, input_format_orc_read_use_writer_time_zone, false, "Whether use the writer's time zone in ORC stripe for ORC row reader, the default ORC row reader's time zone is GMT.", 0) \ + M(String, input_format_orc_reader_time_zone_name, "GMT", "The time zone name for ORC row reader, the default ORC row reader's time zone is GMT.", 0) \ M(Bool, input_format_parquet_allow_missing_columns, true, "Allow missing columns while reading Parquet input formats", 0) \ M(UInt64, input_format_parquet_local_file_min_bytes_for_seek, 8192, "Min bytes required for local read (file) to do seek, instead of read with ignore in Parquet input format", 0) \ M(Bool, input_format_arrow_allow_missing_columns, true, "Allow missing columns while reading Arrow input formats", 0) \ diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp index acd119c159b..457caa76bb6 100644 --- a/src/Core/SettingsChangesHistory.cpp +++ b/src/Core/SettingsChangesHistory.cpp @@ -69,7 +69,7 @@ static std::initializer_listgetWriterTimezone(); - row_reader_options.setTimezoneName(writer_time_zone); - } + row_reader_options.setTimezoneName(format_settings.orc.reader_time_zone_name); row_reader_options.range(current_stripe_info->getOffset(), current_stripe_info->getLength()); if (format_settings.orc.filter_push_down && sarg) { diff --git a/tests/queries/0_stateless/03198_orc_read_time_zone.sh b/tests/queries/0_stateless/03198_orc_read_time_zone.sh index 27530c06237..7e931e16e48 100755 --- a/tests/queries/0_stateless/03198_orc_read_time_zone.sh +++ b/tests/queries/0_stateless/03198_orc_read_time_zone.sh @@ -5,8 +5,8 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -$CLICKHOUSE_CLIENT -q "drop table if exists test" -$CLICKHOUSE_CLIENT -q "create table test(id UInt64, t DateTime64) Engine=MergeTree order by id" -$CLICKHOUSE_CLIENT -q "insert into test from infile '$CURDIR/data_orc/test_reader_time_zone.snappy.orc' SETTINGS input_format_orc_read_use_writer_time_zone=true FORMAT ORC" -$CLICKHOUSE_CLIENT -q "select * from test SETTINGS session_timezone='Asia/Shanghai'" -$CLICKHOUSE_CLIENT -q "drop table test" \ No newline at end of file +$CLICKHOUSE_CLIENT -q "drop table if exists test_orc_read_timezone" +$CLICKHOUSE_CLIENT -q "create table test_orc_read_timezone(id UInt64, t DateTime64) Engine=MergeTree order by id" +$CLICKHOUSE_CLIENT -q "insert into test_orc_read_timezone from infile '$CURDIR/data_orc/test_reader_time_zone.snappy.orc' SETTINGS input_format_orc_reader_time_zone_name='Asia/Shanghai' FORMAT ORC" +$CLICKHOUSE_CLIENT -q "select * from test_orc_read_timezone" +$CLICKHOUSE_CLIENT -q "drop table test_orc_read_timezone" \ No newline at end of file From c66a9f2d365e101798b2eee6cde7acc903a2fb46 Mon Sep 17 00:00:00 2001 From: Konstantin Bogdanov Date: Fri, 26 Jul 2024 09:39:31 +0200 Subject: [PATCH 233/661] Fix --- .../01676_clickhouse_client_autocomplete.python | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.python b/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.python index 9072dfeb09f..0f35d259c7c 100644 --- a/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.python +++ b/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.python @@ -59,13 +59,14 @@ def test_completion(program, argv, comp_word): output = output_b.decode() debug_log_fd.write(repr(output_b) + "\n") debug_log_fd.flush() - # fail fast if there is a bell character in the output, - # meaning no concise completion is found - if "\x07" in output: - print(f"{comp_word}: FAIL") - return while not comp_word in output: + # fail fast if there is a bell character in the output, + # meaning no concise completion is found + if "\x07" in output: + print(f"{comp_word}: FAIL") + return + output_b = os.read(master, 4096) output += output_b.decode() debug_log_fd.write(repr(output_b) + "\n") From 83dba7194f3467dc0f6e5499d65bda8a66fa8206 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Fri, 26 Jul 2024 09:55:09 +0200 Subject: [PATCH 234/661] Add deserialization of empty state --- .../03208_groupArrayIntersect_serialization.reference | 1 + .../0_stateless/03208_groupArrayIntersect_serialization.sql | 2 ++ 2 files changed, 3 insertions(+) diff --git a/tests/queries/0_stateless/03208_groupArrayIntersect_serialization.reference b/tests/queries/0_stateless/03208_groupArrayIntersect_serialization.reference index c3b6e0cd5b7..e84856c90fd 100644 --- a/tests/queries/0_stateless/03208_groupArrayIntersect_serialization.reference +++ b/tests/queries/0_stateless/03208_groupArrayIntersect_serialization.reference @@ -10,3 +10,4 @@ a [(['2','4','6','8','10'])] b [(['2','4','6','8','10'])] c [(['2','4','6','8','10'])] d [] +e [] diff --git a/tests/queries/0_stateless/03208_groupArrayIntersect_serialization.sql b/tests/queries/0_stateless/03208_groupArrayIntersect_serialization.sql index e05f78a4051..1b3d48ce0c3 100644 --- a/tests/queries/0_stateless/03208_groupArrayIntersect_serialization.sql +++ b/tests/queries/0_stateless/03208_groupArrayIntersect_serialization.sql @@ -39,3 +39,5 @@ INSERT INTO grouparray_string Select groupArrayIntersectState([tuple(['2', '4', SELECT 'c', arraySort(groupArrayIntersectMerge(v)) FROM grouparray_string; INSERT INTO grouparray_string Select groupArrayIntersectState([tuple(['2', '4', '6', '8', '10', '20']), tuple(['2', '4', '6', '8', '10', '14'])]); SELECT 'd', arraySort(groupArrayIntersectMerge(v)) FROM grouparray_string; +INSERT INTO grouparray_string Select groupArrayIntersectState([]::Array(Tuple(Array(String)))); +SELECT 'e', arraySort(groupArrayIntersectMerge(v)) FROM grouparray_string; From ad4e807cf4bb3633616b01e3616844fe2108d59f Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 26 Jul 2024 08:12:01 +0000 Subject: [PATCH 235/661] Fix stupid crash. --- src/Interpreters/InterpreterSelectQuery.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index 4fd6f7a2900..41306a79198 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -2055,9 +2055,9 @@ void InterpreterSelectQuery::addEmptySourceToQueryPlan(QueryPlan & query_plan, c { auto & prewhere_info = *query_info.prewhere_info; - auto row_level_actions = std::make_shared(prewhere_info.row_level_filter->clone()); if (prewhere_info.row_level_filter) { + auto row_level_actions = std::make_shared(prewhere_info.row_level_filter->clone()); pipe.addSimpleTransform([&](const Block & header) { return std::make_shared(header, From de84f4f045f5ece627ca8295a09a5f2cf1eab6aa Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Fri, 26 Jul 2024 08:13:21 +0000 Subject: [PATCH 236/661] add proper cast to lagInFrame/leadInFrame --- src/Processors/Transforms/WindowTransform.cpp | 86 ++++++++++++++++--- src/Processors/Transforms/WindowTransform.h | 1 + 2 files changed, 75 insertions(+), 12 deletions(-) diff --git a/src/Processors/Transforms/WindowTransform.cpp b/src/Processors/Transforms/WindowTransform.cpp index 06ae2bfb25e..006593edeaa 100644 --- a/src/Processors/Transforms/WindowTransform.cpp +++ b/src/Processors/Transforms/WindowTransform.cpp @@ -16,6 +16,9 @@ #include #include #include +#include +#include +#include #include #include @@ -75,6 +78,8 @@ public: virtual std::optional getDefaultFrame() const { return {}; } + virtual ColumnPtr castColumn(const Columns &, const std::vector &) { return nullptr; } + /// Is the frame type supported by this function. virtual bool checkWindowFrameType(const WindowTransform * /*transform*/) const { return true; } }; @@ -1171,6 +1176,9 @@ void WindowTransform::appendChunk(Chunk & chunk) // Initialize output columns. for (auto & ws : workspaces) { + if (ws.window_function_impl) + block.casted_columns.push_back(ws.window_function_impl->castColumn(block.input_columns, ws.argument_column_indices)); + block.output_columns.push_back(ws.aggregate_function->getResultType() ->createColumn()); block.output_columns.back()->reserve(block.rows); @@ -2358,6 +2366,8 @@ public: template struct WindowFunctionLagLeadInFrame final : public WindowFunction { + FunctionBasePtr func_cast = nullptr; + WindowFunctionLagLeadInFrame(const std::string & name_, const DataTypes & argument_types_, const Array & parameters_) : WindowFunction(name_, argument_types_, parameters_, createResultType(argument_types_, name_)) @@ -2385,18 +2395,71 @@ struct WindowFunctionLagLeadInFrame final : public WindowFunction return; } - if (!argument_types[0]->equals(*argument_types[2])) - throw Exception(ErrorCodes::BAD_ARGUMENTS, - "Argument type '{}' and the default value type '{}' are different", - argument_types[0]->getName(), - argument_types[2]->getName()); - if (argument_types.size() > 3) { throw Exception(ErrorCodes::BAD_ARGUMENTS, "Function '{}' accepts at most 3 arguments, {} given", name, argument_types.size()); } + + if (argument_types[0]->equals(*argument_types[2])) + return; + + const auto supertype = getLeastSupertype(DataTypes{argument_types[0], argument_types[2]}); + if (!supertype) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "There is no supertype for the argument type '{}' and the default value type '{}'", + argument_types[0]->getName(), + argument_types[2]->getName()); + } + if (!argument_types[0]->equals(*supertype)) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "The supertype '{}' for the argument type '{}' and the default value type '{}' is not the same as the argument type", + supertype->getName(), + argument_types[0]->getName(), + argument_types[2]->getName()); + } + + const auto from_name = argument_types[2]->getName(); + const auto to_name = argument_types[0]->getName(); + ColumnsWithTypeAndName arguments + { + { argument_types[2], "" }, + { + DataTypeString().createColumnConst(0, to_name), + std::make_shared(), + "" + } + }; + + auto get_cast_func = [&arguments] + { + FunctionOverloadResolverPtr func_builder_cast = createInternalCastOverloadResolver(CastType::accurate, {}); + return func_builder_cast->build(arguments); + }; + + func_cast = get_cast_func(); + + } + + ColumnPtr castColumn(const Columns & columns, const std::vector & idx) override + { + if (!func_cast) + return nullptr; + + ColumnsWithTypeAndName arguments + { + { columns[idx[2]], argument_types[2], "" }, + { + DataTypeString().createColumnConst(columns[idx[2]]->size(), argument_types[0]->getName()), + std::make_shared(), + "" + } + }; + + return func_cast->execute(arguments, argument_types[0], columns[idx[2]]->size()); } static DataTypePtr createResultType(const DataTypes & argument_types_, const std::string & name_) @@ -2446,12 +2509,11 @@ struct WindowFunctionLagLeadInFrame final : public WindowFunction if (argument_types.size() > 2) { // Column with default values is specified. - // The conversion through Field is inefficient, but we accept - // subtypes of the argument type as a default value (for convenience), - // and it's a pain to write conversion that respects ColumnNothing - // and ColumnConst and so on. - const IColumn & default_column = *current_block.input_columns[ - workspace.argument_column_indices[2]].get(); + const IColumn & default_column = + current_block.casted_columns[function_index] ? + *current_block.casted_columns[function_index].get() : + *current_block.input_columns[workspace.argument_column_indices[2]].get(); + to.insert(default_column[transform->current_row.row]); } else diff --git a/src/Processors/Transforms/WindowTransform.h b/src/Processors/Transforms/WindowTransform.h index 43fa6b28019..fe4f79e997c 100644 --- a/src/Processors/Transforms/WindowTransform.h +++ b/src/Processors/Transforms/WindowTransform.h @@ -50,6 +50,7 @@ struct WindowTransformBlock { Columns original_input_columns; Columns input_columns; + Columns casted_columns; MutableColumns output_columns; size_t rows = 0; From 498ae4358647dbff5fde2861a7113a9c9597930a Mon Sep 17 00:00:00 2001 From: Alexander Gololobov Date: Fri, 26 Jul 2024 10:42:23 +0200 Subject: [PATCH 237/661] Make 02908_many_requests_to_system_replicas less stressful --- ...08_many_requests_to_system_replicas.reference | 14 +++++++------- .../02908_many_requests_to_system_replicas.sh | 16 ++++++++-------- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/tests/queries/0_stateless/02908_many_requests_to_system_replicas.reference b/tests/queries/0_stateless/02908_many_requests_to_system_replicas.reference index f376bb87044..fdefd2e3466 100644 --- a/tests/queries/0_stateless/02908_many_requests_to_system_replicas.reference +++ b/tests/queries/0_stateless/02908_many_requests_to_system_replicas.reference @@ -1,10 +1,10 @@ -Creating 300 tables -900 1 1 -900 1 1 -900 1 1 -900 1 1 -Making 200 requests to system.replicas +Creating 50 tables +150 1 1 +150 1 1 +150 1 1 +150 1 1 +Making 100 requests to system.replicas Query system.replicas while waiting for other concurrent requests to finish 0 -900 +150 1 diff --git a/tests/queries/0_stateless/02908_many_requests_to_system_replicas.sh b/tests/queries/0_stateless/02908_many_requests_to_system_replicas.sh index a247c99a818..81ba59fc591 100755 --- a/tests/queries/0_stateless/02908_many_requests_to_system_replicas.sh +++ b/tests/queries/0_stateless/02908_many_requests_to_system_replicas.sh @@ -7,8 +7,8 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) set -e -NUM_TABLES=300 -CONCURRENCY=200 +NUM_TABLES=50 +CONCURRENCY=100 echo "Creating $NUM_TABLES tables" @@ -46,10 +46,10 @@ wait; # Check results with different max_block_size -$CLICKHOUSE_CLIENT -q 'SELECT count(), sum(total_replicas) >= 2700, sum(active_replicas) >= 2700 FROM system.replicas WHERE database=currentDatabase()' -$CLICKHOUSE_CLIENT -q 'SELECT count(), sum(total_replicas) >= 2700, sum(active_replicas) >= 2700 FROM system.replicas WHERE database=currentDatabase() SETTINGS max_block_size=1' -$CLICKHOUSE_CLIENT -q 'SELECT count(), sum(total_replicas) >= 2700, sum(active_replicas) >= 2700 FROM system.replicas WHERE database=currentDatabase() SETTINGS max_block_size=77' -$CLICKHOUSE_CLIENT -q 'SELECT count(), sum(total_replicas) >= 2700, sum(active_replicas) >= 2700 FROM system.replicas WHERE database=currentDatabase() SETTINGS max_block_size=11111' +$CLICKHOUSE_CLIENT -q 'SELECT count() as c, sum(total_replicas) >= 3*c, sum(active_replicas) >= 3*c FROM system.replicas WHERE database=currentDatabase()' +$CLICKHOUSE_CLIENT -q 'SELECT count() as c, sum(total_replicas) >= 3*c, sum(active_replicas) >= 3*c FROM system.replicas WHERE database=currentDatabase() SETTINGS max_block_size=1' +$CLICKHOUSE_CLIENT -q 'SELECT count() as c, sum(total_replicas) >= 3*c, sum(active_replicas) >= 3*c FROM system.replicas WHERE database=currentDatabase() SETTINGS max_block_size=77' +$CLICKHOUSE_CLIENT -q 'SELECT count() as c, sum(total_replicas) >= 3*c, sum(active_replicas) >= 3*c FROM system.replicas WHERE database=currentDatabase() SETTINGS max_block_size=11111' echo "Making $CONCURRENCY requests to system.replicas" @@ -70,8 +70,8 @@ wait; $CLICKHOUSE_CLIENT -nq " SYSTEM FLUSH LOGS; --- without optimisation there are ~350K zk requests -SELECT sum(ProfileEvents['ZooKeeperTransactions']) < 30000 +-- Check that number of ZK request is less then a half of (total replicas * concurrency) +SELECT sum(ProfileEvents['ZooKeeperTransactions']) < (${NUM_TABLES} * 3 * ${CONCURRENCY} / 2) FROM system.query_log WHERE current_database=currentDatabase() AND log_comment='02908_many_requests'; " From 5bf89a433128985944cb5dd6ad6ef40a9658ff52 Mon Sep 17 00:00:00 2001 From: Pablo Marcos Date: Fri, 26 Jul 2024 08:42:52 +0000 Subject: [PATCH 238/661] Set a different instance dir when using pytest-xdist This allows executing in integration tests in parallel without directory clashes. --- tests/integration/helpers/cluster.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index 548b58a17e8..0c8278048bf 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -434,6 +434,11 @@ class ClickHouseCluster: # docker-compose removes everything non-alphanumeric from project names so we do it too. self.project_name = re.sub(r"[^a-z0-9]", "", project_name.lower()) self.instances_dir_name = get_instances_dir(self.name) + xdist_worker = os.getenv("PYTEST_XDIST_WORKER") + if xdist_worker: + self.project_name += f"_{xdist_worker}" + self.instances_dir_name += f"_{xdist_worker}" + self.instances_dir = p.join(self.base_dir, self.instances_dir_name) self.docker_logs_path = p.join(self.instances_dir, "docker.log") self.env_file = p.join(self.instances_dir, DEFAULT_ENV_NAME) From 0c5c23e78477636560cd09f17b91db79e420680f Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Fri, 26 Jul 2024 10:21:36 +0200 Subject: [PATCH 239/661] More fixes --- src/Common/Exception.cpp | 42 +++++++++++++++++++++++++++++---- src/Common/Exception.h | 31 ++++++++++++++++++------ src/Common/SignalHandlers.cpp | 2 +- src/Common/StackTrace.cpp | 2 +- src/Common/ThreadPool.cpp | 4 ++-- src/Loggers/OwnSplitChannel.cpp | 7 +++++- src/Loggers/OwnSplitChannel.h | 2 ++ 7 files changed, 73 insertions(+), 17 deletions(-) diff --git a/src/Common/Exception.cpp b/src/Common/Exception.cpp index 33befa64946..c4bd4fbd943 100644 --- a/src/Common/Exception.cpp +++ b/src/Common/Exception.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -100,7 +101,7 @@ Exception::Exception(const MessageMasked & msg_masked, int code, bool remote_) { if (terminate_on_any_exception) std::_Exit(terminate_status_code); - capture_thread_frame_pointers = thread_frame_pointers; + capture_thread_frame_pointers = getThreadFramePointers(); handle_error_code(msg_masked.msg, code, remote, getStackFramePointers()); } @@ -110,7 +111,7 @@ Exception::Exception(MessageMasked && msg_masked, int code, bool remote_) { if (terminate_on_any_exception) std::_Exit(terminate_status_code); - capture_thread_frame_pointers = thread_frame_pointers; + capture_thread_frame_pointers = getThreadFramePointers(); handle_error_code(message(), code, remote, getStackFramePointers()); } @@ -119,7 +120,7 @@ Exception::Exception(CreateFromPocoTag, const Poco::Exception & exc) { if (terminate_on_any_exception) std::_Exit(terminate_status_code); - capture_thread_frame_pointers = thread_frame_pointers; + capture_thread_frame_pointers = getThreadFramePointers(); #ifdef STD_EXCEPTION_HAS_STACK_TRACE auto * stack_trace_frames = exc.get_stack_trace_frames(); auto stack_trace_size = exc.get_stack_trace_size(); @@ -133,7 +134,7 @@ Exception::Exception(CreateFromSTDTag, const std::exception & exc) { if (terminate_on_any_exception) std::_Exit(terminate_status_code); - capture_thread_frame_pointers = thread_frame_pointers; + capture_thread_frame_pointers = getThreadFramePointers(); #ifdef STD_EXCEPTION_HAS_STACK_TRACE auto * stack_trace_frames = exc.get_stack_trace_frames(); auto stack_trace_size = exc.get_stack_trace_size(); @@ -223,10 +224,38 @@ Exception::FramePointers Exception::getStackFramePointers() const } thread_local bool Exception::enable_job_stack_trace = false; -thread_local std::vector Exception::thread_frame_pointers = {}; +thread_local bool Exception::can_use_thread_frame_pointers = false; +thread_local Exception::ThreadFramePointers Exception::thread_frame_pointers; + +Exception::ThreadFramePointers::ThreadFramePointers() +{ + can_use_thread_frame_pointers = true; +} + +Exception::ThreadFramePointers::~ThreadFramePointers() +{ + can_use_thread_frame_pointers = false; +} + +Exception::ThreadFramePointersBase Exception::getThreadFramePointers() +{ + if (can_use_thread_frame_pointers) + return thread_frame_pointers.frame_pointers; + + return {}; +} + +void Exception::setThreadFramePointers(ThreadFramePointersBase frame_pointers) +{ + if (can_use_thread_frame_pointers) + thread_frame_pointers.frame_pointers = std::move(frame_pointers); +} static void tryLogCurrentExceptionImpl(Poco::Logger * logger, const std::string & start_of_message) { + if (!OwnSplitChannel::isLoggingEnabled()) + return; + try { PreformattedMessage message = getCurrentExceptionMessageAndPattern(true); @@ -242,6 +271,9 @@ static void tryLogCurrentExceptionImpl(Poco::Logger * logger, const std::string void tryLogCurrentException(const char * log_name, const std::string & start_of_message) { + if (!OwnSplitChannel::isLoggingEnabled()) + return; + /// Under high memory pressure, new allocations throw a /// MEMORY_LIMIT_EXCEEDED exception. /// diff --git a/src/Common/Exception.h b/src/Common/Exception.h index 4e54c411bf1..a4f55f41caa 100644 --- a/src/Common/Exception.h +++ b/src/Common/Exception.h @@ -10,7 +10,6 @@ #include #include -#include #include #include @@ -49,14 +48,14 @@ public: { if (terminate_on_any_exception) std::terminate(); - capture_thread_frame_pointers = thread_frame_pointers; + capture_thread_frame_pointers = getThreadFramePointers(); } Exception(const PreformattedMessage & msg, int code): Exception(msg.text, code) { if (terminate_on_any_exception) std::terminate(); - capture_thread_frame_pointers = thread_frame_pointers; + capture_thread_frame_pointers = getThreadFramePointers(); message_format_string = msg.format_string; message_format_string_args = msg.format_string_args; } @@ -65,18 +64,36 @@ public: { if (terminate_on_any_exception) std::terminate(); - capture_thread_frame_pointers = thread_frame_pointers; + capture_thread_frame_pointers = getThreadFramePointers(); message_format_string = msg.format_string; message_format_string_args = msg.format_string_args; } /// Collect call stacks of all previous jobs' schedulings leading to this thread job's execution static thread_local bool enable_job_stack_trace; - static thread_local std::vector thread_frame_pointers; + static thread_local bool can_use_thread_frame_pointers; + /// Because of unknown order of static destructor calls, + /// thread_frame_pointers can already be uninitialized when a different destructor generates an exception. + /// To prevent such scenarios, a wrapper class is created and a function that will return empty vector + /// if its destructor is already called + using ThreadFramePointersBase = std::vector; + struct ThreadFramePointers + { + ThreadFramePointers(); + ~ThreadFramePointers(); + + ThreadFramePointersBase frame_pointers; + }; + + static ThreadFramePointersBase getThreadFramePointers(); + static void setThreadFramePointers(ThreadFramePointersBase frame_pointers); + /// Callback for any exception static std::function callback; protected: + static thread_local ThreadFramePointers thread_frame_pointers; + // used to remove the sensitive information from exceptions if query_masking_rules is configured struct MessageMasked { @@ -178,7 +195,7 @@ class ErrnoException : public Exception public: ErrnoException(std::string && msg, int code, int with_errno) : Exception(msg, code), saved_errno(with_errno) { - capture_thread_frame_pointers = thread_frame_pointers; + capture_thread_frame_pointers = getThreadFramePointers(); addMessage(", {}", errnoToString(saved_errno)); } @@ -187,7 +204,7 @@ public: requires std::is_convertible_v ErrnoException(int code, T && message) : Exception(message, code), saved_errno(errno) { - capture_thread_frame_pointers = thread_frame_pointers; + capture_thread_frame_pointers = getThreadFramePointers(); addMessage(", {}", errnoToString(saved_errno)); } diff --git a/src/Common/SignalHandlers.cpp b/src/Common/SignalHandlers.cpp index 52c83d80121..e025e49e0a3 100644 --- a/src/Common/SignalHandlers.cpp +++ b/src/Common/SignalHandlers.cpp @@ -89,7 +89,7 @@ void signalHandler(int sig, siginfo_t * info, void * context) writePODBinary(*info, out); writePODBinary(signal_context, out); writePODBinary(stack_trace, out); - writeVectorBinary(Exception::enable_job_stack_trace ? Exception::thread_frame_pointers : std::vector{}, out); + writeVectorBinary(Exception::enable_job_stack_trace ? Exception::getThreadFramePointers() : std::vector{}, out); writeBinary(static_cast(getThreadId()), out); writePODBinary(current_thread, out); diff --git a/src/Common/StackTrace.cpp b/src/Common/StackTrace.cpp index ff8765c9727..76277cbc993 100644 --- a/src/Common/StackTrace.cpp +++ b/src/Common/StackTrace.cpp @@ -489,7 +489,7 @@ struct CacheEntry using CacheEntryPtr = std::shared_ptr; -static constinit std::atomic can_use_cache = false; +static constinit bool can_use_cache = false; using StackTraceCacheBase = std::map>; diff --git a/src/Common/ThreadPool.cpp b/src/Common/ThreadPool.cpp index 01f561d573f..c8f1ae99969 100644 --- a/src/Common/ThreadPool.cpp +++ b/src/Common/ThreadPool.cpp @@ -51,7 +51,7 @@ public: if (!capture_frame_pointers) return; /// Save all previous jobs call stacks and append with current - frame_pointers = DB::Exception::thread_frame_pointers; + frame_pointers = DB::Exception::getThreadFramePointers(); frame_pointers.push_back(StackTrace().getFramePointers()); } @@ -455,7 +455,7 @@ void ThreadPoolImpl::worker(typename std::list::iterator thread_ try { if (DB::Exception::enable_job_stack_trace) - DB::Exception::thread_frame_pointers = std::move(job_data->frame_pointers); + DB::Exception::setThreadFramePointers(std::move(job_data->frame_pointers)); CurrentMetrics::Increment metric_active_pool_threads(metric_active_threads); diff --git a/src/Loggers/OwnSplitChannel.cpp b/src/Loggers/OwnSplitChannel.cpp index c0e8514c62a..e29d2a1e0aa 100644 --- a/src/Loggers/OwnSplitChannel.cpp +++ b/src/Loggers/OwnSplitChannel.cpp @@ -18,6 +18,11 @@ namespace DB static constinit std::atomic allow_logging{true}; +bool OwnSplitChannel::isLoggingEnabled() +{ + return allow_logging; +} + void OwnSplitChannel::disableLogging() { allow_logging = false; @@ -25,7 +30,7 @@ void OwnSplitChannel::disableLogging() void OwnSplitChannel::log(const Poco::Message & msg) { - if (!allow_logging) + if (!isLoggingEnabled()) return; #ifndef WITHOUT_TEXT_LOG diff --git a/src/Loggers/OwnSplitChannel.h b/src/Loggers/OwnSplitChannel.h index 9872a4fb558..9de55f330be 100644 --- a/src/Loggers/OwnSplitChannel.h +++ b/src/Loggers/OwnSplitChannel.h @@ -41,6 +41,8 @@ public: static void disableLogging(); + static bool isLoggingEnabled(); + private: void logSplit(const Poco::Message & msg); void tryLogSplit(const Poco::Message & msg); From c7330252cf581441b95c51b47977f597eb41734e Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Fri, 26 Jul 2024 11:47:49 +0300 Subject: [PATCH 240/661] Disable convert OUTER JOIN to INNER JOIN optimization for non ALL JOIN strictness --- .../QueryPlan/Optimizations/convertOuterJoinToInnerJoin.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/Processors/QueryPlan/Optimizations/convertOuterJoinToInnerJoin.cpp b/src/Processors/QueryPlan/Optimizations/convertOuterJoinToInnerJoin.cpp index d90f0e152e7..d9296f10a98 100644 --- a/src/Processors/QueryPlan/Optimizations/convertOuterJoinToInnerJoin.cpp +++ b/src/Processors/QueryPlan/Optimizations/convertOuterJoinToInnerJoin.cpp @@ -23,7 +23,10 @@ size_t tryConvertOuterJoinToInnerJoin(QueryPlan::Node * parent_node, QueryPlan:: return 0; const auto & table_join = join->getJoin()->getTableJoin(); - if (table_join.strictness() == JoinStrictness::Asof) + + /// Any JOIN issue https://github.com/ClickHouse/ClickHouse/issues/66447 + /// Anti JOIN issue https://github.com/ClickHouse/ClickHouse/issues/67156 + if (table_join.strictness() != JoinStrictness::All) return 0; /// TODO: Support join_use_nulls From aaa25454b31d854338200b335d7ac6442e959af4 Mon Sep 17 00:00:00 2001 From: Blargian Date: Fri, 26 Jul 2024 10:58:45 +0200 Subject: [PATCH 241/661] Additional formatting fixes --- .../functions/type-conversion-functions.md | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/docs/en/sql-reference/functions/type-conversion-functions.md b/docs/en/sql-reference/functions/type-conversion-functions.md index 4326753216e..87d824ec5bb 100644 --- a/docs/en/sql-reference/functions/type-conversion-functions.md +++ b/docs/en/sql-reference/functions/type-conversion-functions.md @@ -571,7 +571,7 @@ Result: ## toInt32OrZero -Like [`toInt32`](#toint32), this function converts an input value to a value of type [Int8](../data-types/int-uint.md) but returns `0` in case of an error. +Like [`toInt32`](#toint32), this function converts an input value to a value of type [Int32](../data-types/int-uint.md) but returns `0` in case of an error. **Syntax** @@ -767,7 +767,7 @@ SELECT toInt64('9223372036854775808') == --9223372036854775808; **Returned value** -- 64-bit integer value. [Int64](../data-types/int-uint.md). [Int64](../data-types/int-uint.md). +- 64-bit integer value. [Int64](../data-types/int-uint.md). :::note The function uses [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning it truncates fractional digits of numbers. @@ -800,7 +800,7 @@ Result: ## toInt64OrZero -Like [`toInt64`](#toint64), this function converts an input value to a value of type [Int8](../data-types/int-uint.md) but returns `0` in case of an error. +Like [`toInt64`](#toint64), this function converts an input value to a value of type [Int64](../data-types/int-uint.md) but returns `0` in case of an error. **Syntax** @@ -878,7 +878,7 @@ Types for which `\N` is returned: **Returned value** -- Integer value of type `Int64` if successful, otherwise `NULL`. [Int64](../data-types/int-uint.md) / [NULL](../data-types/nullable.md). +- 64-bit integer value if successful, otherwise `NULL`. [Int64](../data-types/int-uint.md) / [NULL](../data-types/nullable.md). :::note The function uses [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning it truncates fractional digits of numbers. @@ -935,7 +935,7 @@ Types for which the default value is returned: **Returned value** -- Integer value of type `Int64` if successful, otherwise returns the default value. [Int64](../data-types/int-uint.md). +- 64-bit integer value if successful, otherwise returns the default value. [Int64](../data-types/int-uint.md). :::note - The function uses [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning it truncates fractional digits of numbers. @@ -1028,7 +1028,7 @@ Result: ## toInt128OrZero -Like [`toInt128`](#toint128), this function converts an input value to a value of type [Int8](../data-types/int-uint.md) but returns `0` in case of an error. +Like [`toInt128`](#toint128), this function converts an input value to a value of type [Int128](../data-types/int-uint.md) but returns `0` in case of an error. **Syntax** @@ -1256,7 +1256,7 @@ Result: ## toInt256OrZero -Like [`toInt256`](#toint256), this function converts an input value to a value of type [Int8](../data-types/int-uint.md) but returns `0` in case of an error. +Like [`toInt256`](#toint256), this function converts an input value to a value of type [Int256](../data-types/int-uint.md) but returns `0` in case of an error. **Syntax** From 338685cc79a5358246977f2ba039230a615c6ea6 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Fri, 26 Jul 2024 10:59:17 +0200 Subject: [PATCH 242/661] Fix build --- programs/odbc-bridge/tests/CMakeLists.txt | 2 +- src/CMakeLists.txt | 1 + src/Common/mysqlxx/tests/CMakeLists.txt | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/programs/odbc-bridge/tests/CMakeLists.txt b/programs/odbc-bridge/tests/CMakeLists.txt index f1411dbb554..2f63aed7942 100644 --- a/programs/odbc-bridge/tests/CMakeLists.txt +++ b/programs/odbc-bridge/tests/CMakeLists.txt @@ -1,2 +1,2 @@ clickhouse_add_executable (validate-odbc-connection-string validate-odbc-connection-string.cpp ../validateODBCConnectionString.cpp) -target_link_libraries (validate-odbc-connection-string PRIVATE clickhouse_common_io clickhouse_common_config) +target_link_libraries (validate-odbc-connection-string PRIVATE clickhouse_common_io clickhouse_common_config loggers_no_text_log) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 0f84dd35320..fede7d69105 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -638,6 +638,7 @@ if (ENABLE_TESTS) dbms clickhouse_common_config clickhouse_common_zookeeper + loggers hilite_comparator) if (TARGET ch_contrib::simdjson) diff --git a/src/Common/mysqlxx/tests/CMakeLists.txt b/src/Common/mysqlxx/tests/CMakeLists.txt index f62908ddcaf..53bee778470 100644 --- a/src/Common/mysqlxx/tests/CMakeLists.txt +++ b/src/Common/mysqlxx/tests/CMakeLists.txt @@ -1,2 +1,2 @@ clickhouse_add_executable (mysqlxx_pool_test mysqlxx_pool_test.cpp) -target_link_libraries (mysqlxx_pool_test PRIVATE mysqlxx clickhouse_common_config) +target_link_libraries (mysqlxx_pool_test PRIVATE mysqlxx clickhouse_common_config loggers_no_text_log) From 8d13461fb74fc991b73382d04a9bc7a9fd3425fa Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 26 Jul 2024 09:03:37 +0000 Subject: [PATCH 243/661] Another fix. --- src/Storages/StorageDistributed.cpp | 7 ++----- src/Storages/StorageMerge.cpp | 8 ++++++++ 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index 9b417cda177..07892971ec2 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -847,11 +847,8 @@ void StorageDistributed::read( /** For distributed tables we do not need constants in header, since we don't send them to remote servers. * Moreover, constants can break some functions like `hostName` that are constants only for local queries. */ - if (processed_stage != QueryProcessingStage::Complete) - { - for (auto & column : header) - column.column = column.column->convertToFullColumnIfConst(); - } + for (auto & column : header) + column.column = column.column->convertToFullColumnIfConst(); modified_query_info.query = queryNodeToDistributedSelectQuery(query_tree_distributed); modified_query_info.query_tree = std::move(query_tree_distributed); diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp index f5bc183931f..9962da3d6de 100644 --- a/src/Storages/StorageMerge.cpp +++ b/src/Storages/StorageMerge.cpp @@ -369,6 +369,14 @@ void StorageMerge::read( /// What will be result structure depending on query processed stage in source tables? Block common_header = getHeaderForProcessingStage(column_names, storage_snapshot, query_info, local_context, processed_stage); + if (local_context->getSettingsRef().allow_experimental_analyzer && processed_stage == QueryProcessingStage::Complete) + { + /// Remove constants. + /// For StorageDistributed some functions like `hostName` that are constants only for local queries. + for (auto & column : common_header) + column.column = column.column->convertToFullColumnIfConst(); + } + auto step = std::make_unique( column_names, query_info, From 02bfe82192fa4aa6ebb3e7b9192ec6f334fbfc56 Mon Sep 17 00:00:00 2001 From: Blargian Date: Fri, 26 Jul 2024 11:19:46 +0200 Subject: [PATCH 244/661] rename filesystemFree to fiilesystemUnreserved --- docs/en/sql-reference/functions/other-functions.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/en/sql-reference/functions/other-functions.md b/docs/en/sql-reference/functions/other-functions.md index b7e4094f30e..79bffe00d01 100644 --- a/docs/en/sql-reference/functions/other-functions.md +++ b/docs/en/sql-reference/functions/other-functions.md @@ -2102,14 +2102,14 @@ Result: └─────────────────┘ ``` -## filesystemFree +## filesystemUnreserved -Returns the total amount of the free space on the filesystem hosting the database persistence. See also `filesystemAvailable` +Returns the total amount of the free space on the filesystem hosting the database persistence. (previously `filesystemFree`). See also [`filesystemAvailable`](#filesystemavailable). **Syntax** ```sql -filesystemFree() +filesystemUnreserved() ``` **Returned value** @@ -2121,7 +2121,7 @@ filesystemFree() Query: ```sql -SELECT formatReadableSize(filesystemFree()) AS "Free space"; +SELECT formatReadableSize(filesystemUnreserved()) AS "Free space"; ``` Result: From 434571d496a6ca6fc1b0038ead560572d0553ee5 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Fri, 26 Jul 2024 12:40:20 +0300 Subject: [PATCH 245/661] Added tests --- ...uter_join_to_inner_join_any_join.reference | 3 ++ ...vert_outer_join_to_inner_join_any_join.sql | 33 ++++++++++++++ ...ter_join_to_inner_join_anti_join.reference | 19 ++++++++ ...ert_outer_join_to_inner_join_anti_join.sql | 45 +++++++++++++++++++ 4 files changed, 100 insertions(+) create mode 100644 tests/queries/0_stateless/03210_convert_outer_join_to_inner_join_any_join.reference create mode 100644 tests/queries/0_stateless/03210_convert_outer_join_to_inner_join_any_join.sql create mode 100644 tests/queries/0_stateless/03211_convert_outer_join_to_inner_join_anti_join.reference create mode 100644 tests/queries/0_stateless/03211_convert_outer_join_to_inner_join_anti_join.sql diff --git a/tests/queries/0_stateless/03210_convert_outer_join_to_inner_join_any_join.reference b/tests/queries/0_stateless/03210_convert_outer_join_to_inner_join_any_join.reference new file mode 100644 index 00000000000..3d6a23045fb --- /dev/null +++ b/tests/queries/0_stateless/03210_convert_outer_join_to_inner_join_any_join.reference @@ -0,0 +1,3 @@ +1 tx1 US +1 tx2 US +1 tx3 US diff --git a/tests/queries/0_stateless/03210_convert_outer_join_to_inner_join_any_join.sql b/tests/queries/0_stateless/03210_convert_outer_join_to_inner_join_any_join.sql new file mode 100644 index 00000000000..599875e90cf --- /dev/null +++ b/tests/queries/0_stateless/03210_convert_outer_join_to_inner_join_any_join.sql @@ -0,0 +1,33 @@ +DROP TABLE IF EXISTS user_country; +DROP TABLE IF EXISTS user_transactions; + +CREATE TABLE user_country ( + user_id UInt64, + country String +) +ENGINE = ReplacingMergeTree +ORDER BY user_id; + +CREATE TABLE user_transactions ( + user_id UInt64, + transaction_id String +) +ENGINE = MergeTree +ORDER BY user_id; + +INSERT INTO user_country (user_id, country) VALUES (1, 'US'); +INSERT INTO user_transactions (user_id, transaction_id) VALUES (1, 'tx1'), (1, 'tx2'), (1, 'tx3'), (2, 'tx1'); + +-- Expected 3 rows, got only 1. Removing 'ANY' and adding 'FINAL' fixes +-- the issue (but it is not always possible). Moving filter by 'country' to +-- an outer query doesn't help. Query without filter by 'country' works +-- as expected (returns 3 rows). +SELECT * FROM user_transactions +ANY LEFT JOIN user_country USING (user_id) +WHERE + user_id = 1 + AND country = 'US' +ORDER BY ALL; + +DROP TABLE user_country; +DROP TABLE user_transactions; diff --git a/tests/queries/0_stateless/03211_convert_outer_join_to_inner_join_anti_join.reference b/tests/queries/0_stateless/03211_convert_outer_join_to_inner_join_anti_join.reference new file mode 100644 index 00000000000..d717a29ab23 --- /dev/null +++ b/tests/queries/0_stateless/03211_convert_outer_join_to_inner_join_anti_join.reference @@ -0,0 +1,19 @@ +DATA + ┏━━━━━━━━━━━┳━━━━━━━━━━━┳━━━━┓ + ┃ c0 ┃ c1 ┃ c2 ┃ + ┡━━━━━━━━━━━╇━━━━━━━━━━━╇━━━━┩ +1. │ 826636805 │ 0 │ │ + ├───────────┼───────────┼────┤ +2. │ 0 │ 150808457 │ │ + └───────────┴───────────┴────┘ +NUMBER OF ROWS IN FIRST SHOULD BE EQUAL TO SECOND +FISRT + +SECOND +1 +TO DEBUG I TOOK JUST A SUBQUERY AND IT HAS 1 ROW +THIRD +1 +AND I ADDED SINGLE CONDITION THAT CONDITION <>0 THAT IS 1 IN THIRD QUERY AND IT HAS NO RESULT!!! +FOURTH +1 diff --git a/tests/queries/0_stateless/03211_convert_outer_join_to_inner_join_anti_join.sql b/tests/queries/0_stateless/03211_convert_outer_join_to_inner_join_anti_join.sql new file mode 100644 index 00000000000..77b1d52dd18 --- /dev/null +++ b/tests/queries/0_stateless/03211_convert_outer_join_to_inner_join_anti_join.sql @@ -0,0 +1,45 @@ +DROP TABLE IF EXISTS t0; + +CREATE TABLE t0 (c0 Int32, c1 Int32, c2 String) ENGINE = Log() ; +INSERT INTO t0(c0, c1, c2) VALUES (826636805,0, ''), (0, 150808457, ''); + +SELECT 'DATA'; +SELECT * FROM t0 FORMAT PrettyMonoBlock; + +SELECT 'NUMBER OF ROWS IN FIRST SHOULD BE EQUAL TO SECOND'; + + +SELECT 'FISRT'; +SELECT left.c2 FROM t0 AS left +LEFT ANTI JOIN t0 AS right_0 ON ((left.c0)=(right_0.c1)) +WHERE (abs ((- ((sign (right_0.c1)))))); + +SELECT 'SECOND'; +SELECT SUM(check <> 0) +FROM +( + SELECT (abs ((- ((sign (right_0.c1)))))) AS `check` + FROM t0 AS left + LEFT ANTI JOIN t0 AS right_0 ON ((left.c0)=(right_0.c1)) +); + + +SELECT 'TO DEBUG I TOOK JUST A SUBQUERY AND IT HAS 1 ROW'; + +SELECT 'THIRD'; + +SELECT (abs ((- ((sign (right_0.c1)))))) AS `check` +FROM t0 AS left +LEFT ANTI JOIN t0 AS right_0 ON ((left.c0)=(right_0.c1)); + + +SELECT 'AND I ADDED SINGLE CONDITION THAT CONDITION <>0 THAT IS 1 IN THIRD QUERY AND IT HAS NO RESULT!!!'; + + +SELECT 'FOURTH'; +SELECT (abs ((- ((sign (right_0.c1)))))) AS `check` +FROM t0 AS left +LEFT ANTI JOIN t0 AS right_0 ON ((left.c0)=(right_0.c1)) +WHERE check <> 0; + +DROP TABLE t0; From b3828b038dbcc9c5cf71b99d58f06497c2af3bd6 Mon Sep 17 00:00:00 2001 From: Shaun Struwig <41984034+Blargian@users.noreply.github.com> Date: Fri, 26 Jul 2024 11:49:04 +0200 Subject: [PATCH 246/661] add `filesystemUnreserved` --- utils/check-style/aspell-ignore/en/aspell-dict.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index 8e4e4fafe29..1a324b98ff4 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -1622,6 +1622,7 @@ filesystem filesystemAvailable filesystemCapacity filesystemFree +filesystemUnreserved filesystems finalizeAggregation fips From ca9bf2c67c8ac16d4fd18f2def6e4d3dfea62971 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 26 Jul 2024 11:53:48 +0200 Subject: [PATCH 247/661] Fix tidy --- src/Common/ZooKeeper/ZooKeeper.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Common/ZooKeeper/ZooKeeper.cpp b/src/Common/ZooKeeper/ZooKeeper.cpp index 1250e1273b9..7448d73cbbc 100644 --- a/src/Common/ZooKeeper/ZooKeeper.cpp +++ b/src/Common/ZooKeeper/ZooKeeper.cpp @@ -136,7 +136,7 @@ void ZooKeeper::init(ZooKeeperArgs args_, std::unique_ptr LOG_TRACE(log, "Initialized, hosts: {}, chroot: {}", fmt::join(args.hosts, ","), args.chroot); /// If the balancing strategy has an optimal node then it will be the first in the list - bool connected_to_suboptimal_node = node_idx && *node_idx != shuffled_hosts[0].original_index; + bool connected_to_suboptimal_node = node_idx && static_cast(*node_idx) != shuffled_hosts[0].original_index; bool respect_az = args.prefer_local_availability_zone && !args.client_availability_zone.empty(); bool may_benefit_from_reconnecting = respect_az || args.get_priority_load_balancing.hasOptimalNode(); if (connected_to_suboptimal_node && may_benefit_from_reconnecting) From 2519f9ed4252020c6a9fb21ef1410c87f4053200 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Fri, 26 Jul 2024 12:08:16 +0200 Subject: [PATCH 248/661] Only support archs --- contrib/numactl-cmake/CMakeLists.txt | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/contrib/numactl-cmake/CMakeLists.txt b/contrib/numactl-cmake/CMakeLists.txt index 5d086366c7f..a72ff11e485 100644 --- a/contrib/numactl-cmake/CMakeLists.txt +++ b/contrib/numactl-cmake/CMakeLists.txt @@ -1,4 +1,14 @@ -option (ENABLE_NUMACTL "Enable numactl" ${ENABLE_LIBRARIES}) +if (NOT ( + OS_LINUX AND (ARCH_AMD64 OR ARCH_AARCH64 OR ARCH_LOONGARCH64)) +) + if (ENABLE_NUMACTL) + message (${RECONFIGURE_MESSAGE_LEVEL} + "numactl is disabled implicitly because the OS or architecture is not supported. Use -DENABLE_NUMACTL=0") + endif () + set (ENABLE_NUMACTL OFF) +else() + option (ENABLE_NUMACTL "Enable numactl" ${ENABLE_LIBRARIES}) +endif() if (NOT ENABLE_NUMACTL) message (STATUS "Not using numactl") From 72ebff825c0752e66cefa4f367ce43ff23d77703 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Fri, 26 Jul 2024 12:19:09 +0200 Subject: [PATCH 249/661] Reduce max time of 00763_long_lock_buffer_alter_destination_table --- ...ong_lock_buffer_alter_destination_table.sh | 33 +++++++++++++++---- 1 file changed, 27 insertions(+), 6 deletions(-) diff --git a/tests/queries/0_stateless/00763_long_lock_buffer_alter_destination_table.sh b/tests/queries/0_stateless/00763_long_lock_buffer_alter_destination_table.sh index 7e2384cfc52..c12b4426740 100755 --- a/tests/queries/0_stateless/00763_long_lock_buffer_alter_destination_table.sh +++ b/tests/queries/0_stateless/00763_long_lock_buffer_alter_destination_table.sh @@ -16,18 +16,39 @@ ${CLICKHOUSE_CLIENT} --query="CREATE TABLE buffer_00763_1 (s String) ENGINE = Bu ${CLICKHOUSE_CLIENT} --query="CREATE TABLE mt_00763_1 (x UInt32, s String) ENGINE = MergeTree ORDER BY x" ${CLICKHOUSE_CLIENT} --query="INSERT INTO mt_00763_1 VALUES (1, '1'), (2, '2'), (3, '3')" -function thread1() +function thread_alter() { - seq 1 300 | sed -r -e 's/.+/ALTER TABLE mt_00763_1 MODIFY column s UInt32; ALTER TABLE mt_00763_1 MODIFY column s String;/' | ${CLICKHOUSE_CLIENT} --multiquery --ignore-error ||: + local TIMELIMIT=$((SECONDS+$1)) + local it=0 + while [ $SECONDS -lt "$TIMELIMIT" ] && [ $it -lt 300 ]; + do + it=$((it+1)) + $CLICKHOUSE_CLIENT --multiquery --ignore-error -q " + ALTER TABLE mt_00763_1 MODIFY column s UInt32; + ALTER TABLE mt_00763_1 MODIFY column s String; + " ||: + done } -function thread2() +function thread_query() { - seq 1 2000 | sed -r -e 's/.+/SELECT sum(length(s)) FROM buffer_00763_1;/' | ${CLICKHOUSE_CLIENT} --multiquery --ignore-error 2>&1 | grep -vP '(^3$|^Received exception from server|^Code: 473)' + local TIMELIMIT=$((SECONDS+$1)) + local it=0 + while [ $SECONDS -lt "$TIMELIMIT" ] && [ $it -lt 2000 ]; + do + it=$((it+1)) + $CLICKHOUSE_CLIENT --multiquery --ignore-error -q " + SELECT sum(length(s)) FROM buffer_00763_1; + " 2>&1 | grep -vP '(^3$|^Received exception from server|^Code: 473)' + done } -thread1 & -thread2 & +export -f thread_alter +export -f thread_query + +TIMEOUT=30 +thread_alter $TIMEOUT & +thread_query $TIMEOUT & wait From 98418120cd3167983b5436834d7c568cb42865af Mon Sep 17 00:00:00 2001 From: Pablo Marcos Date: Fri, 26 Jul 2024 10:21:43 +0000 Subject: [PATCH 250/661] Add parallel integration test execution to doc --- tests/integration/README.md | 67 ++++++++++++++++++++++++++++++++++--- 1 file changed, 62 insertions(+), 5 deletions(-) diff --git a/tests/integration/README.md b/tests/integration/README.md index cde4cb05aec..5d4fa407e3f 100644 --- a/tests/integration/README.md +++ b/tests/integration/README.md @@ -16,7 +16,7 @@ Don't use Docker from your system repository. * [py.test](https://docs.pytest.org/) testing framework. To install: `sudo -H pip install pytest` * [docker-compose](https://docs.docker.com/compose/) and additional python libraries. To install: -``` +```bash sudo -H pip install \ PyMySQL \ avro \ @@ -78,7 +78,7 @@ Notes: * Some tests maybe require a lot of resources (CPU, RAM, etc.). Better not try large tests like `test_distributed_ddl*` on your laptop. You can run tests via `./runner` script and pass pytest arguments as last arg: -``` +```bash $ ./runner --binary $HOME/ClickHouse/programs/clickhouse --odbc-bridge-binary $HOME/ClickHouse/programs/clickhouse-odbc-bridge --base-configs-dir $HOME/ClickHouse/programs/server/ 'test_ssl_cert_authentication -ss' Start tests ====================================================================================================== test session starts ====================================================================================================== @@ -102,7 +102,7 @@ test_ssl_cert_authentication/test.py::test_create_user PASSED ``` Path to binary and configs maybe specified via env variables: -``` +```bash $ export CLICKHOUSE_TESTS_BASE_CONFIG_DIR=$HOME/ClickHouse/programs/server/ $ export CLICKHOUSE_TESTS_SERVER_BIN_PATH=$HOME/ClickHouse/programs/clickhouse $ export CLICKHOUSE_TESTS_ODBC_BRIDGE_BIN_PATH=$HOME/ClickHouse/programs/clickhouse-odbc-bridge @@ -121,6 +121,63 @@ test_odbc_interaction/test.py ...... [100%] You can just open shell inside a container by overwritting the command: ./runner --command=bash +### Parallel test execution + +On the CI, we run a number of parallel runners (5 at the time of this writing), each on its own +Docker container. These runner containers spawn more containers for the services needed such as +ZooKeeper, MySQL, PostgreSQL and minio, among others. Within each runner, tests are parallelized +using [pytest-xdist](https://pytest-xdist.readthedocs.io/en/stable/). We're using `--dist=loadfile` +to [distribute the load](https://pytest-xdist.readthedocs.io/en/stable/distribution.html). In other +words: tests are grouped by module for test functions and by class for test methods. This means that +any test within the same module (or any class) will never execute their tests in parallel. They'll +be executed on the same worker one after the other. + +If the test supports parallel and repeated execution, you can run a bunch of them in parallel to +look for flakiness. We use [pytest-repeat](https://pypi.org/project/pytest-repeat/) to set the +number of times we want to execute a test through the `--count` argument. Then, `-n` sets the number +of parallel workers for `pytest-xdist`. + +```bash +$ export CLICKHOUSE_TESTS_BASE_CONFIG_DIR=$HOME/ClickHouse/programs/server/ +$ export CLICKHOUSE_TESTS_SERVER_BIN_PATH=$HOME/ClickHouse/programs/clickhouse +$ export CLICKHOUSE_TESTS_ODBC_BRIDGE_BIN_PATH=$HOME/ClickHouse/programs/clickhouse-odbc-bridge +$ ./runner 'test_storage_s3_queue/test.py::test_max_set_age -- --count 10 -n 5' +Start tests +=============================================================================== test session starts ================================================================================ +platform linux -- Python 3.10.12, pytest-7.4.4, pluggy-1.5.0 -- /usr/bin/python3 +cachedir: .pytest_cache +rootdir: /ClickHouse/tests/integration +configfile: pytest.ini +plugins: reportlog-0.4.0, xdist-3.5.0, random-0.2, repeat-0.9.3, order-1.0.0, timeout-2.2.0 +timeout: 900.0s +timeout method: signal +timeout func_only: False +5 workers [10 items] +scheduling tests via LoadScheduling + +test_storage_s3_queue/test.py::test_max_set_age[9-10] +test_storage_s3_queue/test.py::test_max_set_age[7-10] +test_storage_s3_queue/test.py::test_max_set_age[5-10] +test_storage_s3_queue/test.py::test_max_set_age[1-10] +test_storage_s3_queue/test.py::test_max_set_age[3-10] +[gw3] [ 10%] PASSED test_storage_s3_queue/test.py::test_max_set_age[7-10] +test_storage_s3_queue/test.py::test_max_set_age[8-10] +[gw4] [ 20%] PASSED test_storage_s3_queue/test.py::test_max_set_age[9-10] +test_storage_s3_queue/test.py::test_max_set_age[10-10] +[gw0] [ 30%] PASSED test_storage_s3_queue/test.py::test_max_set_age[1-10] +test_storage_s3_queue/test.py::test_max_set_age[2-10] +[gw1] [ 40%] PASSED test_storage_s3_queue/test.py::test_max_set_age[3-10] +test_storage_s3_queue/test.py::test_max_set_age[4-10] +[gw2] [ 50%] PASSED test_storage_s3_queue/test.py::test_max_set_age[5-10] +test_storage_s3_queue/test.py::test_max_set_age[6-10] +[gw3] [ 60%] PASSED test_storage_s3_queue/test.py::test_max_set_age[8-10] +[gw4] [ 70%] PASSED test_storage_s3_queue/test.py::test_max_set_age[10-10] +[gw0] [ 80%] PASSED test_storage_s3_queue/test.py::test_max_set_age[2-10] +[gw1] [ 90%] PASSED test_storage_s3_queue/test.py::test_max_set_age[4-10] +[gw2] [100%] PASSED test_storage_s3_queue/test.py::test_max_set_age[6-10] +========================================================================== 10 passed in 120.65s (0:02:00) ========================================================================== +``` + ### Rebuilding the docker containers The main container used for integration tests lives in `docker/test/integration/base/Dockerfile`. Rebuild it with @@ -149,7 +206,7 @@ will automagically detect the types of variables and only the small diff of two If tests failing for mysterious reasons, this may help: -``` +```bash sudo service docker stop sudo bash -c 'rm -rf /var/lib/docker/*' sudo service docker start @@ -159,6 +216,6 @@ sudo service docker start On Ubuntu 20.10 and later in host network mode (default) one may encounter problem with nested containers not seeing each other. It happens because legacy and nftables rules are out of sync. Problem can be solved by: -``` +```bash sudo iptables -P FORWARD ACCEPT ``` From 7f80dab6927316f5c6c56e51ba439d01161f7567 Mon Sep 17 00:00:00 2001 From: Nikita Fomichev Date: Fri, 26 Jul 2024 12:34:36 +0200 Subject: [PATCH 251/661] CI push --- docker/test/util/process_functional_tests_result.py | 1 - 1 file changed, 1 deletion(-) diff --git a/docker/test/util/process_functional_tests_result.py b/docker/test/util/process_functional_tests_result.py index dbe50eeade0..3da1a8f3674 100755 --- a/docker/test/util/process_functional_tests_result.py +++ b/docker/test/util/process_functional_tests_result.py @@ -180,7 +180,6 @@ def process_result(result_path, broken_tests): for result in test_results: if result[1] == "FAIL": result[1] = "SERVER_DIED" - test_results.append(["Server died", "FAIL", "0", ""]) elif not success_finish: description = "Tests are not finished, " From 60cca77c8a415142fe6f181b25aaed84232ea3c0 Mon Sep 17 00:00:00 2001 From: Blargian Date: Fri, 26 Jul 2024 12:39:35 +0200 Subject: [PATCH 252/661] add example for materialize function --- .../functions/other-functions.md | 32 ++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/docs/en/sql-reference/functions/other-functions.md b/docs/en/sql-reference/functions/other-functions.md index b7e4094f30e..797607e552a 100644 --- a/docs/en/sql-reference/functions/other-functions.md +++ b/docs/en/sql-reference/functions/other-functions.md @@ -346,7 +346,9 @@ Result: ## materialize Turns a constant into a full column containing a single value. -Full columns and constants are represented differently in memory. Functions usually execute different code for normal and constant arguments, although the result should typically be the same. This function can be used to debug this behavior. +Full columns and constants are represented differently in memory. +Functions usually execute different code for normal and constant arguments, although the result should typically be the same. +This function can be used to debug this behavior. **Syntax** @@ -354,6 +356,34 @@ Full columns and constants are represented differently in memory. Functions usua materialize(x) ``` +**Parameters** + +- `x` — A constant. [Constant](../functions/index.md/#constants). + +**Returned value** + +- A column containing a single value `x`. + +**Example** + +In the example below the `countMatches` function expects a constant second argument. +This behaviour can be debugged by using the `materialize` function to turn a constant into a full column, +verifying that the function throws an error for a non-constant argument. + +Query: + +```sql +SELECT countMatches('foobarfoo', 'foo'); +SELECT countMatches('foobarfoo', materialize('foo')); +``` + +Result: + +```response +2 +Code: 44. DB::Exception: Received from localhost:9000. DB::Exception: Illegal type of argument #2 'pattern' of function countMatches, expected constant String, got String +``` + ## ignore Accepts any arguments, including `NULL` and does nothing. Always returns 0. From 1ebafccc13ea69ba06e2450014fd15d39facdcaa Mon Sep 17 00:00:00 2001 From: Shaun Struwig <41984034+Blargian@users.noreply.github.com> Date: Fri, 26 Jul 2024 12:42:07 +0200 Subject: [PATCH 253/661] add `joinGetOrNull` --- utils/check-style/aspell-ignore/en/aspell-dict.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index 7de065cc589..182e1d2cb33 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -1862,6 +1862,7 @@ jdbc jemalloc jeprof joinGet +joinGetOrNull json jsonMergePatch jsonasstring From 3f1dbdfce978bab2b2ce2aedecdbb5afbf54c4a0 Mon Sep 17 00:00:00 2001 From: Pablo Marcos Date: Fri, 26 Jul 2024 11:01:10 +0000 Subject: [PATCH 254/661] Clarify documentation --- tests/integration/README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/integration/README.md b/tests/integration/README.md index 5d4fa407e3f..ab984b7bd04 100644 --- a/tests/integration/README.md +++ b/tests/integration/README.md @@ -127,10 +127,10 @@ On the CI, we run a number of parallel runners (5 at the time of this writing), Docker container. These runner containers spawn more containers for the services needed such as ZooKeeper, MySQL, PostgreSQL and minio, among others. Within each runner, tests are parallelized using [pytest-xdist](https://pytest-xdist.readthedocs.io/en/stable/). We're using `--dist=loadfile` -to [distribute the load](https://pytest-xdist.readthedocs.io/en/stable/distribution.html). In other -words: tests are grouped by module for test functions and by class for test methods. This means that -any test within the same module (or any class) will never execute their tests in parallel. They'll -be executed on the same worker one after the other. +to [distribute the load](https://pytest-xdist.readthedocs.io/en/stable/distribution.html). In the +documentation words: this guarantees that all tests in a file run in the same worker. This means +that any test within the same file will never execute their tests in parallel. They'll be executed +on the same worker one after the other. If the test supports parallel and repeated execution, you can run a bunch of them in parallel to look for flakiness. We use [pytest-repeat](https://pypi.org/project/pytest-repeat/) to set the From 1225d50508ad0885dca3367b08c15f54c65b02f6 Mon Sep 17 00:00:00 2001 From: serxa Date: Fri, 26 Jul 2024 11:09:48 +0000 Subject: [PATCH 255/661] Do not count AttachedTable for tables in information schema databases --- src/Databases/DatabasesCommon.cpp | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/Databases/DatabasesCommon.cpp b/src/Databases/DatabasesCommon.cpp index d2926c64f29..b8e9231f5c6 100644 --- a/src/Databases/DatabasesCommon.cpp +++ b/src/Databases/DatabasesCommon.cpp @@ -289,9 +289,7 @@ StoragePtr DatabaseWithOwnTablesBase::detachTableUnlocked(const String & table_n tables.erase(it); table_storage->is_detached = true; - if (!table_storage->isSystemStorage() - && database_name != DatabaseCatalog::SYSTEM_DATABASE - && database_name != DatabaseCatalog::TEMPORARY_DATABASE) + if (!table_storage->isSystemStorage() && !DatabaseCatalog::isPredefinedDatabase(database_name)) { LOG_TEST(log, "Counting detached table {} to database {}", table_name, database_name); CurrentMetrics::sub(getAttachedCounterForStorage(table_storage)); @@ -339,9 +337,7 @@ void DatabaseWithOwnTablesBase::attachTableUnlocked(const String & table_name, c /// non-Atomic database the is_detached is set to true before RENAME. table->is_detached = false; - if (!table->isSystemStorage() - && database_name != DatabaseCatalog::SYSTEM_DATABASE - && database_name != DatabaseCatalog::TEMPORARY_DATABASE) + if (!table->isSystemStorage() && !DatabaseCatalog::isPredefinedDatabase(database_name)) { LOG_TEST(log, "Counting attached table {} to database {}", table_name, database_name); CurrentMetrics::add(getAttachedCounterForStorage(table)); From 0cf0437196dfe4ee0f489ecc040b71e42e1f1a22 Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Thu, 25 Jul 2024 16:36:32 +0200 Subject: [PATCH 256/661] Use separate client context in clickhouse-local --- programs/client/Client.cpp | 29 ++++----- programs/client/Client.h | 1 - programs/local/LocalServer.cpp | 28 +++++--- programs/local/LocalServer.h | 4 +- src/Client/ClientBase.cpp | 113 ++++++++++++++++++--------------- src/Client/ClientBase.h | 6 ++ 6 files changed, 102 insertions(+), 79 deletions(-) diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index 887c5cb86bc..f2919db0308 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -209,8 +209,8 @@ std::vector Client::loadWarningMessages() {} /* query_parameters */, "" /* query_id */, QueryProcessingStage::Complete, - &global_context->getSettingsRef(), - &global_context->getClientInfo(), false, {}); + &client_context->getSettingsRef(), + &client_context->getClientInfo(), false, {}); while (true) { Packet packet = connection->receivePacket(); @@ -306,9 +306,6 @@ void Client::initialize(Poco::Util::Application & self) if (env_password && !config().has("password")) config().setString("password", env_password); - // global_context->setApplicationType(Context::ApplicationType::CLIENT); - global_context->setQueryParameters(query_parameters); - /// settings and limits could be specified in config file, but passed settings has higher priority for (const auto & setting : global_context->getSettingsRef().allUnchanged()) { @@ -382,7 +379,7 @@ try showWarnings(); /// Set user password complexity rules - auto & access_control = global_context->getAccessControl(); + auto & access_control = client_context->getAccessControl(); access_control.setPasswordComplexityRules(connection->getPasswordComplexityRules()); if (is_interactive && !delayed_interactive) @@ -459,7 +456,7 @@ void Client::connect() << connection_parameters.host << ":" << connection_parameters.port << (!connection_parameters.user.empty() ? " as user " + connection_parameters.user : "") << "." << std::endl; - connection = Connection::createConnection(connection_parameters, global_context); + connection = Connection::createConnection(connection_parameters, client_context); if (max_client_network_bandwidth) { @@ -528,7 +525,7 @@ void Client::connect() } } - if (!global_context->getSettingsRef().use_client_time_zone) + if (!client_context->getSettingsRef().use_client_time_zone) { const auto & time_zone = connection->getServerTimezone(connection_parameters.timeouts); if (!time_zone.empty()) @@ -611,7 +608,7 @@ void Client::printChangedSettings() const } }; - print_changes(global_context->getSettingsRef().changes(), "settings"); + print_changes(client_context->getSettingsRef().changes(), "settings"); print_changes(cmd_merge_tree_settings.changes(), "MergeTree settings"); } @@ -709,7 +706,7 @@ bool Client::processWithFuzzing(const String & full_query) { const char * begin = full_query.data(); orig_ast = parseQuery(begin, begin + full_query.size(), - global_context->getSettingsRef(), + client_context->getSettingsRef(), /*allow_multi_statements=*/ true); } catch (const Exception & e) @@ -733,7 +730,7 @@ bool Client::processWithFuzzing(const String & full_query) } // Kusto is not a subject for fuzzing (yet) - if (global_context->getSettingsRef().dialect == DB::Dialect::kusto) + if (client_context->getSettingsRef().dialect == DB::Dialect::kusto) { return true; } @@ -1072,6 +1069,11 @@ void Client::processOptions(const OptionsDescription & options_description, global_context->makeGlobalContext(); global_context->setApplicationType(Context::ApplicationType::CLIENT); + /// In case of clickhouse-client the `client_context` can be just an alias for the `global_context`. + /// (There is no need to copy the context because clickhouse-client has no background tasks so it won't use that context in parallel.) + client_context = global_context; + initClientContext(); + global_context->setSettings(cmd_settings); /// Copy settings-related program options to config. @@ -1205,11 +1207,6 @@ void Client::processConfig() pager = config().getString("pager", ""); setDefaultFormatsAndCompressionFromConfiguration(); - - global_context->setClientName(std::string(DEFAULT_CLIENT_NAME)); - global_context->setQueryKindInitial(); - global_context->setQuotaClientKey(config().getString("quota_key", "")); - global_context->setQueryKind(query_kind); } diff --git a/programs/client/Client.h b/programs/client/Client.h index 6d57a6ea648..ff71b36dbf3 100644 --- a/programs/client/Client.h +++ b/programs/client/Client.h @@ -19,7 +19,6 @@ public: int main(const std::vector & /*args*/) override; protected: - Poco::Util::LayeredConfiguration & getClientConfiguration() override; bool processWithFuzzing(const String & full_query) override; diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index 48e0cca7b73..e60c8ef6085 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -295,6 +295,8 @@ void LocalServer::cleanup() if (suggest) suggest.reset(); + client_context.reset(); + if (global_context) { global_context->shutdown(); @@ -436,7 +438,7 @@ void LocalServer::connect() in = input.get(); } connection = LocalConnection::createConnection( - connection_parameters, global_context, in, need_render_progress, need_render_profile_events, server_display_name); + connection_parameters, client_context, in, need_render_progress, need_render_profile_events, server_display_name); } @@ -497,8 +499,6 @@ try initTTYBuffer(toProgressOption(getClientConfiguration().getString("progress", "default"))); ASTAlterCommand::setFormatAlterCommandsWithParentheses(true); - applyCmdSettings(global_context); - /// try to load user defined executable functions, throw on error and die try { @@ -510,6 +510,11 @@ try throw; } + /// Must be called after we stopped initializing the global context and changing its settings. + /// After this point the global context must be stayed almost unchanged till shutdown, + /// and all necessary changes must be made to the client context instead. + createClientContext(); + if (is_interactive) { clearTerminal(); @@ -730,11 +735,12 @@ void LocalServer::processConfig() /// there is separate context for Buffer tables). adjustSettings(); applySettingsOverridesForLocal(global_context); - applyCmdOptions(global_context); /// Load global settings from default_profile and system_profile. global_context->setDefaultProfiles(getClientConfiguration()); + applyCmdOptions(global_context); + /// We load temporary database first, because projections need it. DatabaseCatalog::instance().initializeAndLoadTemporaryDatabase(); @@ -778,10 +784,6 @@ void LocalServer::processConfig() server_display_name = getClientConfiguration().getString("display_name", ""); prompt_by_server_display_name = getClientConfiguration().getRawString("prompt_by_server_display_name.default", ":) "); - - global_context->setQueryKindInitial(); - global_context->setQueryKind(query_kind); - global_context->setQueryParameters(query_parameters); } @@ -860,6 +862,16 @@ void LocalServer::applyCmdOptions(ContextMutablePtr context) } +void LocalServer::createClientContext() +{ + /// In case of clickhouse-local it's necessary to use a separate context for client-related purposes. + /// We can't just change the global context because it is used in background tasks (for example, in merges) + /// which don't expect that the global context can suddenly change. + client_context = Context::createCopy(global_context); + initClientContext(); +} + + void LocalServer::processOptions(const OptionsDescription &, const CommandLineOptions & options, const std::vector &, const std::vector &) { if (options.count("table")) diff --git a/programs/local/LocalServer.h b/programs/local/LocalServer.h index 0715f358313..ae9980311e1 100644 --- a/programs/local/LocalServer.h +++ b/programs/local/LocalServer.h @@ -31,7 +31,6 @@ public: int main(const std::vector & /*args*/) override; protected: - Poco::Util::LayeredConfiguration & getClientConfiguration() override; void connect() override; @@ -50,7 +49,6 @@ protected: void processConfig() override; void readArguments(int argc, char ** argv, Arguments & common_arguments, std::vector &, std::vector &) override; - void updateLoggerLevel(const String & logs_level) override; private: @@ -67,6 +65,8 @@ private: void applyCmdOptions(ContextMutablePtr context); void applyCmdSettings(ContextMutablePtr context); + void createClientContext(); + ServerSettings server_settings; std::optional status; diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 13dce05cabc..50cc6b98b81 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -467,7 +467,7 @@ void ClientBase::sendExternalTables(ASTPtr parsed_query) std::vector data; for (auto & table : external_tables) - data.emplace_back(table.getData(global_context)); + data.emplace_back(table.getData(client_context)); connection->sendExternalTablesData(data); } @@ -680,10 +680,10 @@ try /// intermixed with data with parallel formatting. /// It may increase code complexity significantly. if (!extras_into_stdout || select_only_into_file) - output_format = global_context->getOutputFormatParallelIfPossible( + output_format = client_context->getOutputFormatParallelIfPossible( current_format, out_file_buf ? *out_file_buf : *out_buf, block); else - output_format = global_context->getOutputFormat( + output_format = client_context->getOutputFormat( current_format, out_file_buf ? *out_file_buf : *out_buf, block); output_format->setAutoFlush(); @@ -762,6 +762,15 @@ void ClientBase::adjustSettings() global_context->setSettings(settings); } +void ClientBase::initClientContext() +{ + client_context->setClientName(std::string(DEFAULT_CLIENT_NAME)); + client_context->setQuotaClientKey(getClientConfiguration().getString("quota_key", "")); + client_context->setQueryKindInitial(); + client_context->setQueryKind(query_kind); + client_context->setQueryParameters(query_parameters); +} + bool ClientBase::isRegularFile(int fd) { struct stat file_stat; @@ -952,7 +961,7 @@ void ClientBase::processTextAsSingleQuery(const String & full_query) /// client-side. Thus we need to parse the query. const char * begin = full_query.data(); auto parsed_query = parseQuery(begin, begin + full_query.size(), - global_context->getSettingsRef(), + client_context->getSettingsRef(), /*allow_multi_statements=*/ false); if (!parsed_query) @@ -975,7 +984,7 @@ void ClientBase::processTextAsSingleQuery(const String & full_query) /// But for asynchronous inserts we don't extract data, because it's needed /// to be done on server side in that case (for coalescing the data from multiple inserts on server side). const auto * insert = parsed_query->as(); - if (insert && isSyncInsertWithData(*insert, global_context)) + if (insert && isSyncInsertWithData(*insert, client_context)) query_to_execute = full_query.substr(0, insert->data - full_query.data()); else query_to_execute = full_query; @@ -1093,7 +1102,7 @@ void ClientBase::processOrdinaryQuery(const String & query_to_execute, ASTPtr pa } } - const auto & settings = global_context->getSettingsRef(); + const auto & settings = client_context->getSettingsRef(); const Int32 signals_before_stop = settings.partial_result_on_first_cancel ? 2 : 1; int retries_left = 10; @@ -1108,10 +1117,10 @@ void ClientBase::processOrdinaryQuery(const String & query_to_execute, ASTPtr pa connection_parameters.timeouts, query, query_parameters, - global_context->getCurrentQueryId(), + client_context->getCurrentQueryId(), query_processing_stage, - &global_context->getSettingsRef(), - &global_context->getClientInfo(), + &client_context->getSettingsRef(), + &client_context->getClientInfo(), true, [&](const Progress & progress) { onProgress(progress); }); @@ -1298,7 +1307,7 @@ void ClientBase::onProgress(const Progress & value) void ClientBase::onTimezoneUpdate(const String & tz) { - global_context->setSetting("session_timezone", tz); + client_context->setSetting("session_timezone", tz); } @@ -1494,13 +1503,13 @@ bool ClientBase::receiveSampleBlock(Block & out, ColumnsDescription & columns_de void ClientBase::setInsertionTable(const ASTInsertQuery & insert_query) { - if (!global_context->hasInsertionTable() && insert_query.table) + if (!client_context->hasInsertionTable() && insert_query.table) { String table = insert_query.table->as().shortName(); if (!table.empty()) { String database = insert_query.database ? insert_query.database->as().shortName() : ""; - global_context->setInsertionTable(StorageID(database, table)); + client_context->setInsertionTable(StorageID(database, table)); } } } @@ -1551,7 +1560,7 @@ void ClientBase::processInsertQuery(const String & query_to_execute, ASTPtr pars const auto & parsed_insert_query = parsed_query->as(); if ((!parsed_insert_query.data && !parsed_insert_query.infile) && (is_interactive || (!stdin_is_a_tty && !isStdinNotEmptyAndValid(std_in)))) { - const auto & settings = global_context->getSettingsRef(); + const auto & settings = client_context->getSettingsRef(); if (settings.throw_if_no_data_to_insert) throw Exception(ErrorCodes::NO_DATA_TO_INSERT, "No data to insert"); else @@ -1565,10 +1574,10 @@ void ClientBase::processInsertQuery(const String & query_to_execute, ASTPtr pars connection_parameters.timeouts, query, query_parameters, - global_context->getCurrentQueryId(), + client_context->getCurrentQueryId(), query_processing_stage, - &global_context->getSettingsRef(), - &global_context->getClientInfo(), + &client_context->getSettingsRef(), + &client_context->getClientInfo(), true, [&](const Progress & progress) { onProgress(progress); }); @@ -1616,7 +1625,7 @@ void ClientBase::sendData(Block & sample, const ColumnsDescription & columns_des /// Set callback to be called on file progress. if (tty_buf) - progress_indication.setFileProgressCallback(global_context, *tty_buf); + progress_indication.setFileProgressCallback(client_context, *tty_buf); } /// If data fetched from file (maybe compressed file) @@ -1650,10 +1659,10 @@ void ClientBase::sendData(Block & sample, const ColumnsDescription & columns_des } StorageFile::CommonArguments args{ - WithContext(global_context), + WithContext(client_context), parsed_insert_query->table_id, current_format, - getFormatSettings(global_context), + getFormatSettings(client_context), compression_method, columns_for_storage_file, ConstraintsDescription{}, @@ -1661,7 +1670,7 @@ void ClientBase::sendData(Block & sample, const ColumnsDescription & columns_des {}, String{}, }; - StoragePtr storage = std::make_shared(in_file, global_context->getUserFilesPath(), args); + StoragePtr storage = std::make_shared(in_file, client_context->getUserFilesPath(), args); storage->startup(); SelectQueryInfo query_info; @@ -1672,16 +1681,16 @@ void ClientBase::sendData(Block & sample, const ColumnsDescription & columns_des storage->read( plan, sample.getNames(), - storage->getStorageSnapshot(metadata, global_context), + storage->getStorageSnapshot(metadata, client_context), query_info, - global_context, + client_context, {}, - global_context->getSettingsRef().max_block_size, + client_context->getSettingsRef().max_block_size, getNumberOfPhysicalCPUCores()); auto builder = plan.buildQueryPipeline( - QueryPlanOptimizationSettings::fromContext(global_context), - BuildQueryPipelineSettings::fromContext(global_context)); + QueryPlanOptimizationSettings::fromContext(client_context), + BuildQueryPipelineSettings::fromContext(client_context)); QueryPlanResourceHolder resources; auto pipe = QueryPipelineBuilder::getPipe(std::move(*builder), resources); @@ -1742,14 +1751,14 @@ void ClientBase::sendDataFrom(ReadBuffer & buf, Block & sample, const ColumnsDes current_format = insert->format; } - auto source = global_context->getInputFormat(current_format, buf, sample, insert_format_max_block_size); + auto source = client_context->getInputFormat(current_format, buf, sample, insert_format_max_block_size); Pipe pipe(source); if (columns_description.hasDefaults()) { pipe.addSimpleTransform([&](const Block & header) { - return std::make_shared(header, columns_description, *source, global_context); + return std::make_shared(header, columns_description, *source, client_context); }); } @@ -1911,12 +1920,12 @@ void ClientBase::processParsedSingleQuery(const String & full_query, const Strin if (is_interactive) { - global_context->setCurrentQueryId(""); + client_context->setCurrentQueryId(""); // Generate a new query_id for (const auto & query_id_format : query_id_formats) { writeString(query_id_format.first, std_out); - writeString(fmt::format(fmt::runtime(query_id_format.second), fmt::arg("query_id", global_context->getCurrentQueryId())), std_out); + writeString(fmt::format(fmt::runtime(query_id_format.second), fmt::arg("query_id", client_context->getCurrentQueryId())), std_out); writeChar('\n', std_out); std_out.next(); } @@ -1943,7 +1952,7 @@ void ClientBase::processParsedSingleQuery(const String & full_query, const Strin auto password = auth_data->getPassword(); if (password) - global_context->getAccessControl().checkPasswordComplexityRules(*password); + client_context->getAccessControl().checkPasswordComplexityRules(*password); } } } @@ -1958,15 +1967,15 @@ void ClientBase::processParsedSingleQuery(const String & full_query, const Strin std::optional old_settings; SCOPE_EXIT_SAFE({ if (old_settings) - global_context->setSettings(*old_settings); + client_context->setSettings(*old_settings); }); auto apply_query_settings = [&](const IAST & settings_ast) { if (!old_settings) - old_settings.emplace(global_context->getSettingsRef()); - global_context->applySettingsChanges(settings_ast.as()->changes); - global_context->resetSettingsToDefaultValue(settings_ast.as()->default_settings); + old_settings.emplace(client_context->getSettingsRef()); + client_context->applySettingsChanges(settings_ast.as()->changes); + client_context->resetSettingsToDefaultValue(settings_ast.as()->default_settings); }; const auto * insert = parsed_query->as(); @@ -1999,7 +2008,7 @@ void ClientBase::processParsedSingleQuery(const String & full_query, const Strin if (insert && insert->select) insert->tryFindInputFunction(input_function); - bool is_async_insert_with_inlined_data = global_context->getSettingsRef().async_insert && insert && insert->hasInlinedData(); + bool is_async_insert_with_inlined_data = client_context->getSettingsRef().async_insert && insert && insert->hasInlinedData(); if (is_async_insert_with_inlined_data) { @@ -2034,9 +2043,9 @@ void ClientBase::processParsedSingleQuery(const String & full_query, const Strin if (change.name == "profile") current_profile = change.value.safeGet(); else - global_context->applySettingChange(change); + client_context->applySettingChange(change); } - global_context->resetSettingsToDefaultValue(set_query->default_settings); + client_context->resetSettingsToDefaultValue(set_query->default_settings); /// Query parameters inside SET queries should be also saved on the client side /// to override their previous definitions set with --param_* arguments @@ -2044,7 +2053,7 @@ void ClientBase::processParsedSingleQuery(const String & full_query, const Strin for (const auto & [name, value] : set_query->query_parameters) query_parameters.insert_or_assign(name, value); - global_context->addQueryParameters(NameToNameMap{set_query->query_parameters.begin(), set_query->query_parameters.end()}); + client_context->addQueryParameters(NameToNameMap{set_query->query_parameters.begin(), set_query->query_parameters.end()}); } if (const auto * use_query = parsed_query->as()) { @@ -2121,8 +2130,8 @@ MultiQueryProcessingStage ClientBase::analyzeMultiQueryText( if (this_query_begin >= all_queries_end) return MultiQueryProcessingStage::QUERIES_END; - unsigned max_parser_depth = static_cast(global_context->getSettingsRef().max_parser_depth); - unsigned max_parser_backtracks = static_cast(global_context->getSettingsRef().max_parser_backtracks); + unsigned max_parser_depth = static_cast(client_context->getSettingsRef().max_parser_depth); + unsigned max_parser_backtracks = static_cast(client_context->getSettingsRef().max_parser_backtracks); // If there are only comments left until the end of file, we just // stop. The parser can't handle this situation because it always @@ -2142,7 +2151,7 @@ MultiQueryProcessingStage ClientBase::analyzeMultiQueryText( try { parsed_query = parseQuery(this_query_end, all_queries_end, - global_context->getSettingsRef(), + client_context->getSettingsRef(), /*allow_multi_statements=*/ true); } catch (const Exception & e) @@ -2185,7 +2194,7 @@ MultiQueryProcessingStage ClientBase::analyzeMultiQueryText( { this_query_end = find_first_symbols<'\n'>(insert_ast->data, all_queries_end); insert_ast->end = this_query_end; - query_to_execute_end = isSyncInsertWithData(*insert_ast, global_context) ? insert_ast->data : this_query_end; + query_to_execute_end = isSyncInsertWithData(*insert_ast, client_context) ? insert_ast->data : this_query_end; } query_to_execute = all_queries_text.substr(this_query_begin - all_queries_text.data(), query_to_execute_end - this_query_begin); @@ -2387,13 +2396,13 @@ bool ClientBase::executeMultiQuery(const String & all_queries_text) // , where the inline data is delimited by semicolon and not by a // newline. auto * insert_ast = parsed_query->as(); - if (insert_ast && isSyncInsertWithData(*insert_ast, global_context)) + if (insert_ast && isSyncInsertWithData(*insert_ast, client_context)) { this_query_end = insert_ast->end; adjustQueryEnd( this_query_end, all_queries_end, - static_cast(global_context->getSettingsRef().max_parser_depth), - static_cast(global_context->getSettingsRef().max_parser_backtracks)); + static_cast(client_context->getSettingsRef().max_parser_depth), + static_cast(client_context->getSettingsRef().max_parser_backtracks)); } // Report error. @@ -2523,10 +2532,10 @@ void ClientBase::runInteractive() if (load_suggestions) { /// Load suggestion data from the server. - if (global_context->getApplicationType() == Context::ApplicationType::CLIENT) - suggest->load(global_context, connection_parameters, getClientConfiguration().getInt("suggestion_limit"), wait_for_suggestions_to_load); - else if (global_context->getApplicationType() == Context::ApplicationType::LOCAL) - suggest->load(global_context, connection_parameters, getClientConfiguration().getInt("suggestion_limit"), wait_for_suggestions_to_load); + if (client_context->getApplicationType() == Context::ApplicationType::CLIENT) + suggest->load(client_context, connection_parameters, getClientConfiguration().getInt("suggestion_limit"), wait_for_suggestions_to_load); + else if (client_context->getApplicationType() == Context::ApplicationType::LOCAL) + suggest->load(client_context, connection_parameters, getClientConfiguration().getInt("suggestion_limit"), wait_for_suggestions_to_load); } if (home_path.empty()) @@ -2664,7 +2673,7 @@ void ClientBase::runInteractive() { // If a separate connection loading suggestions failed to open a new session, // use the main session to receive them. - suggest->load(*connection, connection_parameters.timeouts, getClientConfiguration().getInt("suggestion_limit"), global_context->getClientInfo()); + suggest->load(*connection, connection_parameters.timeouts, getClientConfiguration().getInt("suggestion_limit"), client_context->getClientInfo()); } try @@ -2713,10 +2722,10 @@ bool ClientBase::processMultiQueryFromFile(const String & file_name) if (!getClientConfiguration().has("log_comment")) { - Settings settings = global_context->getSettings(); + Settings settings = client_context->getSettings(); /// NOTE: cannot use even weakly_canonical() since it fails for /dev/stdin due to resolving of "pipe:[X]" settings.log_comment = fs::absolute(fs::path(file_name)); - global_context->setSettings(settings); + client_context->setSettings(settings); } return executeMultiQuery(queries_from_file); diff --git a/src/Client/ClientBase.h b/src/Client/ClientBase.h index 4f500a4c45d..be74090b84d 100644 --- a/src/Client/ClientBase.h +++ b/src/Client/ClientBase.h @@ -206,6 +206,9 @@ protected: /// Adjust some settings after command line options and config had been processed. void adjustSettings(); + /// Initializes the client context. + void initClientContext(); + void setDefaultFormatsAndCompressionFromConfiguration(); void initTTYBuffer(ProgressOption progress); @@ -215,6 +218,9 @@ protected: SharedContextHolder shared_context; ContextMutablePtr global_context; + /// Client context is a context used only by the client to parse queries, process query parameters and to connect to clickhouse-server. + ContextMutablePtr client_context; + LoggerPtr fatal_log; Poco::AutoPtr fatal_channel_ptr; Poco::AutoPtr fatal_console_channel_ptr; From 3cf2ec36ca31964a5a57717d5645f5e5a287dd00 Mon Sep 17 00:00:00 2001 From: vdimir Date: Fri, 26 Jul 2024 11:36:54 +0000 Subject: [PATCH 257/661] Verbose output for 03203_client_benchmark_options --- .../03203_client_benchmark_options.sh | 27 ++++++++++++++----- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/tests/queries/0_stateless/03203_client_benchmark_options.sh b/tests/queries/0_stateless/03203_client_benchmark_options.sh index a9b9d69822b..475309cebb9 100755 --- a/tests/queries/0_stateless/03203_client_benchmark_options.sh +++ b/tests/queries/0_stateless/03203_client_benchmark_options.sh @@ -4,10 +4,23 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -${CLICKHOUSE_CLIENT} -t -q "SELECT sleepEachRow(2) FORMAT Null" 2>&1 | grep -q "^2\." && echo "Ok" || echo "Fail" -${CLICKHOUSE_CLIENT} --time -q "SELECT sleepEachRow(2) FORMAT Null" 2>&1 | grep -q "^2\." && echo "Ok" || echo "Fail" -${CLICKHOUSE_CLIENT} --memory-usage -q "SELECT sum(number) FROM numbers(10_000) FORMAT Null" 2>&1 | grep -q "^[0-9]\+$" && echo "Ok" || echo "Fail" -${CLICKHOUSE_CLIENT} --memory-usage=none -q "SELECT sum(number) FROM numbers(10_000) FORMAT Null" # expected no output -${CLICKHOUSE_CLIENT} --memory-usage=default -q "SELECT sum(number) FROM numbers(10_000) FORMAT Null" 2>&1 | grep -q "^[0-9]\+$" && echo "Ok" || echo "Fail" -${CLICKHOUSE_CLIENT} --memory-usage=readable -q "SELECT sum(number) FROM numbers(10_000) FORMAT Null" 2>&1 | grep -q "^[0-9].*B$" && echo "Ok" || echo "Fail" -${CLICKHOUSE_CLIENT} --memory-usage=unknown -q "SELECT sum(number) FROM numbers(10_000) FORMAT Null" 2>&1 | grep -q "BAD_ARGUMENTS" && echo "Ok" || echo "Fail" +output=$(${CLICKHOUSE_CLIENT} -t -q "SELECT sleepEachRow(2) FORMAT Null" 2>&1) +echo "$output" | grep -q "^2\." && echo "Ok" || { echo "Fail"; echo "$output"; } + +output=$(${CLICKHOUSE_CLIENT} --time -q "SELECT sleepEachRow(2) FORMAT Null" 2>&1) +echo "$output" | grep -q "^2\." && echo "Ok" || { echo "Fail"; echo "$output"; } + +output=$(${CLICKHOUSE_CLIENT} --memory-usage -q "SELECT sum(number) FROM numbers(10_000) FORMAT Null" 2>&1) +echo "$output" | grep -q "^[0-9]\+$" && echo "Ok" || { echo "Fail"; echo "$output"; } + +output=$(${CLICKHOUSE_CLIENT} --memory-usage=none -q "SELECT sum(number) FROM numbers(10_000) FORMAT Null" 2>&1) +echo -n "$output" # expected no output + +output=$(${CLICKHOUSE_CLIENT} --memory-usage=default -q "SELECT sum(number) FROM numbers(10_000) FORMAT Null" 2>&1) +echo "$output" | grep -q "^[0-9]\+$" && echo "Ok" || { echo "Fail"; echo "$output"; } + +output=$(${CLICKHOUSE_CLIENT} --memory-usage=readable -q "SELECT sum(number) FROM numbers(10_000) FORMAT Null" 2>&1) +echo "$output" | grep -q "^[0-9].*B$" && echo "Ok" || { echo "Fail"; echo "$output"; } + +output=$(${CLICKHOUSE_CLIENT} --memory-usage=unknown -q "SELECT sum(number) FROM numbers(10_000) FORMAT Null" 2>&1) +echo "$output" | grep -q "BAD_ARGUMENTS" && echo "Ok" || { echo "Fail"; echo "$output"; } From 28e991708be1facd87c3760f7929cd5ddc299805 Mon Sep 17 00:00:00 2001 From: maxvostrikov Date: Fri, 26 Jul 2024 13:45:57 +0200 Subject: [PATCH 258/661] squash! added somme tests in relation with https://github.com/ClickHouse/ClickHouse/pull/54881 with new behaviour when enable_named_columns_in_function_tuple=1 (default value) --- .../0_stateless/00309_formats.reference | Bin 18666 -> 20353 bytes tests/queries/0_stateless/00309_formats.sql | 5 ----- 2 files changed, 5 deletions(-) diff --git a/tests/queries/0_stateless/00309_formats.reference b/tests/queries/0_stateless/00309_formats.reference index f3ea45520bb50fb936caf6724e9fedf3cdd00b75..cab311692be229716b58af39079275d3942b01cc 100644 GIT binary patch literal 20353 zcmeHP3wIP%5{6aK9zDL_&p18;axq=E-!KeHeM!>@7a6Ht;zJ#Jk0d+*gt^JbdYKDu4%*X9d$a^ZOYUl#FBHF2yr}q<;@67HiYtm^#Z|>iir*-% zDXuF{6fY}&tN5Mb_lj2(e^C5U@h8Qria#snd+xpO{s$g>=;22meeCfko}78A|MWA@J~z8>|MLgJ!58LIidkNiRoxsq zJUB8u`r@(UFP(V#6D$6wrgio0oJnBji=r;A-&v0K-- z%ZqMZ-|En<>suYVb$zQtx30$|*-G(uTzhnP-g&=^d;Asr39~@AuK#;~vDmn-A16x* zR`tnxBCGmdyM|AAwVs$&ebTP#ldV?ulqNAv(}O7;BueJfKp}=K;v>=KamanSU6UWrDN?_JXVk8gMI`72?`P<#P@i7&<`pS zWF+WF5R#xIK}v#_1ThI}667T4Nf4BvC_z$!rUX$5suE-+=t>Zlpe#XJg0=*43F;E$ zCFn~Kn4mC0VuHp5kqIgjWG3iL5SpMgL28251hENf6XYi7O%R-*I6-oP<^<6RsuN@< z=uQxxpgci(g7yUQ3F;H%C+JUb0Ko$U7Z7|va00;#1UC@;KyU=X69iWfd_iyq!5aj3 z5d48~#s?7ye?<5r!XFX-i10^*KO+1Q;g1M^M8t7~KO+1Q;g1M^MEE1Z9})hD9M2rk z64B_#G0j2EQO#k^am|4Ze?<5r!XFX-i0}tWF!-P!{1M@g2!BNQBf=jM{)q5Lgg+wu z5#f&re?<5r!XFX-i10^*KO+1Q;g1M^#G){NzzgHs7A`ETZCepq_933?i~U2rWWJXqI8KuOcyVRaKQz-H_k$cyu3VDiwye}g+f_bcJ1@D;TzuN5 zD@s+avnf(-W3}#V+EUgwouv-u47MnGiFGeI6M7J0X>pSftOBKaoh3fnm@GvczL`Ta z^L_N{6Fw0i&Ve~T1wJJ{6+SgSO&=q%!s~pgh07?nk!+)EGuh^{Eo58Dw&Hr*^k(>Z zybQ>^Hqtrc{32`%3@;;YAm1kG-$5QXD0j)XNqnD@?1}G`_*RMUmH1|f@0J>DqOcTU z*dnqtku4%yM7D@*5!oWLMaC8xTV$doTV!mJu|>uf8Czs*k+DV27CBqwY>|unY>~4? z&K5aasMw-ni;68Owy4;mVvC9`Dz>QDqGpSlEo!!?*`j8Pnk{O!sM(@si<&JOwrJR* zVT*<>8n$TIqG5}MEgH6H*a8(t*|H6)jlxDpGfP)fUuE8&1TrL@Sn5)r6Ya*MU7eL0wb!Wn76AR5ztX z#+6_}jZ<1=T!|M{I=Mw62Gvg4mvJR%Q1g@)8CT*4l}~AraV2z6|Kt`49@Ic(U&fXA zK_ygLWLya$)I+&Nf(SKH*_Ux8j!+qu78zGU3H4EFk#QxOP$lISi6>M_WnadXkV3sw zT4Y>_DpXCSg~!FbC|e#E)l=EBEh?z8Wm{BHWy|BDk}6xVZKpC4TZt>wRQasL73!+8 zFSZg_sIE$j*tQdl#8s5ILYJI}Nejl>nIuG~}N3YAyc7h8!d)L*4V z#+A526;@hgT!|~xV!1`)3iVjom;05tLRD5;5#VP`KA{>G~DrX+|J4NE4^tt78W7My1Ewbk!9(?&_}d~o*M=)&sS=v zm5YR#+gx;@nz8VMIrYYX7g?<00RC^)91khQIPJmG@OrgrFt^k;LYv_Lo5d@`!DxB3 zGDLsD_K3_;G)I{^%FR(>j!JV>nWNeqHPfRx<;2)BT_0Y1Xf&A9h!Y`)0|nvDH@x8n zHr{gEJg2=CS+6-2Y@=}a^V*~wFA#5d<+f6RtRKlbl5c8*WMo}Q)|X_RN!FWW-AUK{ zDc6tAH={hZdv7)Rc=}3mQ`+9<(5@$S>={hVObTHRbg2bmQ`U{6_yqMxorr-!m=tXtHQD> zEvwS9DlMzhvMMdB(y}TotJ1P6EvwS9sw}I@vZ^er%Cf2~tID#fEUU`0sw}I@vZ^hs z+On!GtJ<=vEvwqHsx7P9vZ^hs+OldatH!cwEUU({YAmb9vT7`=#}TLyo>A3TXJ|u*PXZY@b0cX?~)z)dEQU-ex~gv+M8t>k(P&*cKVkL~(9{aiZ0vklwdB?P?7?an*> zTu#9I-mbsX&!q*_8f<@;81VeWop<`V+<^DVU4N&aOAmPG-1T?*xeS5#(rtg2Bk;7w zop<`VG=X>8U4N&a%M^I;-Sv0+xnzOoK(@b27kFphop<`VjDh#+U4N&aOB#6h-u8EC z1J9b=d8ePt9C&Zv^>_NY_NY{DCT;>+ke)`2)`|ZGV?P@Y@r2-syKefBb6wVB0s=_`E`L+6m)Ezc_5xV{`a(e6A7cfwpXiGp`$XFl3xq_$>F>d{|a@uQ~ Lvprlx%YOd>&)TR8 delta 3052 zcmeIy$4}c(00wZIgtksdZ-w^I3JD}eL2?j6{R4XG1$t-?y;Q0c4wb5?2YL>Bj|8*# z-Xp>6y?59U_TGDMNaN%a?SDX3#j^bK`<|bk?ZfAlN^UDiNE{JVHgaD5e8+19DxExLIy^` zXcz-yVH}Ky2`~{R!DN^MQ}xOSA`L-0%z&9N3ueO{m<#h@J}iKRum~2z5?IQq+n-P( zP{A_D!g5#vD`6F^hBdGj*1>w%02^VGPHumqnY-wY76U{pY=iBv19rkL*bRGNFYJT; zPz?v*AoeGQ5Ddc+I10z$IGlhQI0>iVG@OC6a1PGH1;h6f#3F(vxC~d|DqMr>a070_ zEw~ML;4a*Q`|v>j|8;{y1V``~p1@Oh2G8LIyo6Wq8s5NLcn9yHg%&qjk~HD*O`kk{ zMhS#wBF4@26;n)ok$HM~`}q3#2LuKMhlGZON5~bCOjLACY+QUoVp4KSYFc_mW>$7i zZeD&tVNr2OsZv$OmRD3(RoB$k)i*RYHMg|3wRd!Ob@%l4^{WR4hlWQ+$Hpf#lT*_( zvvczci%ZKZt842Un_JsEyL2uP zs~KC(*lPbltFhlLRQ$(oSyIc$t;|{F>ph;XVgHfpC$+5H%8ajnp%+Y<&DY2OORU_k z(V7&oZmv$Gj7LgIiK)l0pFiA9KYx&VyyL(B=HYJsi3@C{mioxSQew%acGgGsx>HI? tDE$HfH|yd|adXt)D_oo(mI%Z& reinterpretAsString(x), arr)) AS s, (n, d) AS tuple FROM system.numbers LIMIT 2 FORMAT JSON; SELECT number * 246 + 10 AS n, toDate('2000-01-01') + n AS d, range(n) AS arr, arrayStringConcat(arrayMap(x -> reinterpretAsString(x), arr)) AS s, (n, d) AS tuple FROM system.numbers LIMIT 2 FORMAT JSONCompact; SELECT number * 246 + 10 AS n, toDate('2000-01-01') + n AS d, range(n) AS arr, arrayStringConcat(arrayMap(x -> reinterpretAsString(x), arr)) AS s, (n, d) AS tuple FROM system.numbers LIMIT 2 FORMAT XML; - -SET enable_named_columns_in_function_tuple = 1; - -SELECT 36 AS n, toDate('2000-01-01') + n AS d, (n, d) AS tuple FROM system.numbers LIMIT 1 FORMAT RowBinaryWithNamesAndTypes; -SELECT number * 246 + 10 AS n, toDate('2000-01-01') + n AS d, (n, d) AS tuple FROM system.numbers LIMIT 1 FORMAT TabSeparatedWithNamesAndTypes; From d041df80aa112920f28d74ed26a0c8381808dafc Mon Sep 17 00:00:00 2001 From: avogar Date: Fri, 26 Jul 2024 12:14:26 +0000 Subject: [PATCH 259/661] Add test --- ..._to_read_for_schema_inference_in_cache.reference | 2 ++ ...x_bytes_to_read_for_schema_inference_in_cache.sh | 13 +++++++++++++ 2 files changed, 15 insertions(+) create mode 100644 tests/queries/0_stateless/03212_max_bytes_to_read_for_schema_inference_in_cache.reference create mode 100755 tests/queries/0_stateless/03212_max_bytes_to_read_for_schema_inference_in_cache.sh diff --git a/tests/queries/0_stateless/03212_max_bytes_to_read_for_schema_inference_in_cache.reference b/tests/queries/0_stateless/03212_max_bytes_to_read_for_schema_inference_in_cache.reference new file mode 100644 index 00000000000..cd109daac52 --- /dev/null +++ b/tests/queries/0_stateless/03212_max_bytes_to_read_for_schema_inference_in_cache.reference @@ -0,0 +1,2 @@ +x Nullable(Int64) +schema_inference_hints=, max_rows_to_read_for_schema_inference=25000, max_bytes_to_read_for_schema_inference=1000, schema_inference_make_columns_nullable=true, try_infer_integers=true, try_infer_dates=true, try_infer_datetimes=true, try_infer_numbers_from_strings=false, read_bools_as_numbers=true, read_bools_as_strings=true, read_objects_as_strings=true, read_numbers_as_strings=true, read_arrays_as_strings=true, try_infer_objects_as_tuples=true, infer_incomplete_types_as_strings=true, try_infer_objects=false, use_string_type_for_ambiguous_paths_in_named_tuples_inference_from_objects=false diff --git a/tests/queries/0_stateless/03212_max_bytes_to_read_for_schema_inference_in_cache.sh b/tests/queries/0_stateless/03212_max_bytes_to_read_for_schema_inference_in_cache.sh new file mode 100755 index 00000000000..8a77538f592 --- /dev/null +++ b/tests/queries/0_stateless/03212_max_bytes_to_read_for_schema_inference_in_cache.sh @@ -0,0 +1,13 @@ +#!/usr/bin/env bash + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +echo '{"x" : 42}' > $CLICKHOUSE_TEST_UNIQUE_NAME.json +$CLICKHOUSE_LOCAL -nm -q " +DESC file('$CLICKHOUSE_TEST_UNIQUE_NAME.json') SETTINGS input_format_max_bytes_to_read_for_schema_inference=1000; +SELECT additional_format_info from system.schema_inference_cache" + +rm $CLICKHOUSE_TEST_UNIQUE_NAME.json + From 0299475202b59a4d1a54f13f02b7cc9ff44f38cc Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Fri, 26 Jul 2024 14:02:37 +0100 Subject: [PATCH 260/661] impl --- ...eplicas_join_algo_and_analyzer_1.reference | 30 +++++ ...allel_replicas_join_algo_and_analyzer_1.sh | 51 ++++++++ ...eplicas_join_algo_and_analyzer_2.reference | 57 +++++++++ ...allel_replicas_join_algo_and_analyzer_2.sh | 103 +++++++++++++++ ...plicas_join_algo_and_analyzer_3.reference} | 87 ------------- ...llel_replicas_join_algo_and_analyzer_3.sh} | 119 ------------------ 6 files changed, 241 insertions(+), 206 deletions(-) create mode 100644 tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer_1.reference create mode 100755 tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer_1.sh create mode 100644 tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer_2.reference create mode 100755 tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer_2.sh rename tests/queries/0_stateless/{02967_parallel_replicas_join_algo_and_analyzer.reference => 02967_parallel_replicas_join_algo_and_analyzer_3.reference} (55%) rename tests/queries/0_stateless/{02967_parallel_replicas_join_algo_and_analyzer.sh => 02967_parallel_replicas_join_algo_and_analyzer_3.sh} (58%) diff --git a/tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer_1.reference b/tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer_1.reference new file mode 100644 index 00000000000..e1bf9c27a81 --- /dev/null +++ b/tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer_1.reference @@ -0,0 +1,30 @@ + +simple join with analyzer +4200000 4200000 4200000 -1400000 +4200006 4200006 4200006 -1400002 +4200012 4200012 4200012 -1400004 +4200018 4200018 4200018 -1400006 +4200024 4200024 4200024 -1400008 +4200030 4200030 4200030 -1400010 +4200036 4200036 4200036 -1400012 +4200042 4200042 4200042 -1400014 +4200048 4200048 4200048 -1400016 +4200054 4200054 4200054 -1400018 + +simple (global) join with analyzer and parallel replicas +4200000 4200000 4200000 -1400000 +4200006 4200006 4200006 -1400002 +4200012 4200012 4200012 -1400004 +4200018 4200018 4200018 -1400006 +4200024 4200024 4200024 -1400008 +4200030 4200030 4200030 -1400010 +4200036 4200036 4200036 -1400012 +4200042 4200042 4200042 -1400014 +4200048 4200048 4200048 -1400016 +4200054 4200054 4200054 -1400018 +SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value` FROM `default`.`num_2` AS `__table1` (stage: WithMergeableState) +SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value` FROM `default`.`num_2` AS `__table1` (stage: WithMergeableState) + DefaultCoordinator: Coordination done +SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value`, `__table3`.`key` AS `r.key`, `__table3`.`value` AS `r.value` FROM (SELECT `__table2`.`key` AS `key`, `__table2`.`value` AS `value` FROM `default`.`num_1` AS `__table2`) AS `__table1` GLOBAL ALL INNER JOIN `_data_` AS `__table3` ON `__table1`.`key` = `__table3`.`key` ORDER BY `__table1`.`key` ASC LIMIT _CAST(700000, 'UInt64'), _CAST(10, 'UInt64') (stage: WithMergeableState) +SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value`, `__table3`.`key` AS `r.key`, `__table3`.`value` AS `r.value` FROM (SELECT `__table2`.`key` AS `key`, `__table2`.`value` AS `value` FROM `default`.`num_1` AS `__table2`) AS `__table1` GLOBAL ALL INNER JOIN `_data_` AS `__table3` ON `__table1`.`key` = `__table3`.`key` ORDER BY `__table1`.`key` ASC LIMIT _CAST(700000, 'UInt64'), _CAST(10, 'UInt64') (stage: WithMergeableState) + DefaultCoordinator: Coordination done diff --git a/tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer_1.sh b/tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer_1.sh new file mode 100755 index 00000000000..1089eb4051f --- /dev/null +++ b/tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer_1.sh @@ -0,0 +1,51 @@ +#!/usr/bin/env bash +# Tags: long, no-random-settings, no-random-merge-tree-settings + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + + +$CLICKHOUSE_CLIENT -nm -q " +drop table if exists num_1; +drop table if exists num_2; + +create table num_1 (key UInt64, value String) engine = MergeTree order by key; +create table num_2 (key UInt64, value Int64) engine = MergeTree order by key; + +insert into num_1 select number * 2, toString(number * 2) from numbers(1e7); +insert into num_2 select number * 3, -number from numbers(1.5e6); +" + +############## +echo +echo "simple join with analyzer" + +$CLICKHOUSE_CLIENT -q " +select * from (select key, value from num_1) l +inner join (select key, value from num_2) r on l.key = r.key +order by l.key limit 10 offset 700000 +SETTINGS allow_experimental_analyzer=1" + +############## +echo +echo "simple (global) join with analyzer and parallel replicas" + +$CLICKHOUSE_CLIENT -q " +select * from (select key, value from num_1) l +inner join (select key, value from num_2) r on l.key = r.key +order by l.key limit 10 offset 700000 +SETTINGS allow_experimental_analyzer=1, allow_experimental_parallel_reading_from_replicas = 2, +max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, +cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', parallel_replicas_prefer_local_join=0" + +$CLICKHOUSE_CLIENT -q " +select * from (select key, value from num_1) l +inner join (select key, value from num_2) r on l.key = r.key +order by l.key limit 10 offset 700000 +SETTINGS allow_experimental_analyzer=1, allow_experimental_parallel_reading_from_replicas = 2, send_logs_level='trace', +max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, +cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', parallel_replicas_prefer_local_join=0" 2>&1 | +grep "executeQuery\|.*Coordinator: Coordination done" | +grep -o "SELECT.*WithMergeableState)\|.*Coordinator: Coordination done" | +sed -re 's/_data_[[:digit:]]+_[[:digit:]]+/_data_/g' diff --git a/tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer_2.reference b/tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer_2.reference new file mode 100644 index 00000000000..297ec311f3e --- /dev/null +++ b/tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer_2.reference @@ -0,0 +1,57 @@ + +simple (local) join with analyzer and parallel replicas +4200000 4200000 4200000 -1400000 +4200006 4200006 4200006 -1400002 +4200012 4200012 4200012 -1400004 +4200018 4200018 4200018 -1400006 +4200024 4200024 4200024 -1400008 +4200030 4200030 4200030 -1400010 +4200036 4200036 4200036 -1400012 +4200042 4200042 4200042 -1400014 +4200048 4200048 4200048 -1400016 +4200054 4200054 4200054 -1400018 +SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value`, `__table3`.`key` AS `r.key`, `__table3`.`value` AS `r.value` FROM (SELECT `__table2`.`key` AS `key`, `__table2`.`value` AS `value` FROM `default`.`num_1` AS `__table2`) AS `__table1` ALL INNER JOIN (SELECT `__table4`.`key` AS `key`, `__table4`.`value` AS `value` FROM `default`.`num_2` AS `__table4`) AS `__table3` ON `__table1`.`key` = `__table3`.`key` ORDER BY `__table1`.`key` ASC LIMIT _CAST(700000, 'UInt64'), _CAST(10, 'UInt64') (stage: WithMergeableState) +SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value`, `__table3`.`key` AS `r.key`, `__table3`.`value` AS `r.value` FROM (SELECT `__table2`.`key` AS `key`, `__table2`.`value` AS `value` FROM `default`.`num_1` AS `__table2`) AS `__table1` ALL INNER JOIN (SELECT `__table4`.`key` AS `key`, `__table4`.`value` AS `value` FROM `default`.`num_2` AS `__table4`) AS `__table3` ON `__table1`.`key` = `__table3`.`key` ORDER BY `__table1`.`key` ASC LIMIT _CAST(700000, 'UInt64'), _CAST(10, 'UInt64') (stage: WithMergeableState) + DefaultCoordinator: Coordination done + +simple (local) join with analyzer and parallel replicas and full sorting merge join +4200000 4200000 4200000 -1400000 +4200006 4200006 4200006 -1400002 +4200012 4200012 4200012 -1400004 +4200018 4200018 4200018 -1400006 +4200024 4200024 4200024 -1400008 +4200030 4200030 4200030 -1400010 +4200036 4200036 4200036 -1400012 +4200042 4200042 4200042 -1400014 +4200048 4200048 4200048 -1400016 +4200054 4200054 4200054 -1400018 +SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value`, `__table3`.`key` AS `r.key`, `__table3`.`value` AS `r.value` FROM (SELECT `__table2`.`key` AS `key`, `__table2`.`value` AS `value` FROM `default`.`num_1` AS `__table2`) AS `__table1` ALL INNER JOIN (SELECT `__table4`.`key` AS `key`, `__table4`.`value` AS `value` FROM `default`.`num_2` AS `__table4`) AS `__table3` ON `__table1`.`key` = `__table3`.`key` ORDER BY `__table1`.`key` ASC LIMIT _CAST(700000, 'UInt64'), _CAST(10, 'UInt64') (stage: WithMergeableState) +SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value`, `__table3`.`key` AS `r.key`, `__table3`.`value` AS `r.value` FROM (SELECT `__table2`.`key` AS `key`, `__table2`.`value` AS `value` FROM `default`.`num_1` AS `__table2`) AS `__table1` ALL INNER JOIN (SELECT `__table4`.`key` AS `key`, `__table4`.`value` AS `value` FROM `default`.`num_2` AS `__table4`) AS `__table3` ON `__table1`.`key` = `__table3`.`key` ORDER BY `__table1`.`key` ASC LIMIT _CAST(700000, 'UInt64'), _CAST(10, 'UInt64') (stage: WithMergeableState) + WithOrderCoordinator: Coordination done + +nested join with analyzer +420000 420000 420000 -140000 +420042 420042 420042 -140014 +420084 420084 420084 -140028 +420126 420126 420126 -140042 +420168 420168 420168 -140056 +420210 420210 420210 -140070 +420252 420252 420252 -140084 +420294 420294 420294 -140098 +420336 420336 420336 -140112 +420378 420378 420378 -140126 + +nested join with analyzer and parallel replicas, both local +420000 420000 420000 -140000 +420042 420042 420042 -140014 +420084 420084 420084 -140028 +420126 420126 420126 -140042 +420168 420168 420168 -140056 +420210 420210 420210 -140070 +420252 420252 420252 -140084 +420294 420294 420294 -140098 +420336 420336 420336 -140112 +420378 420378 420378 -140126 +SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value`, `__table3`.`key` AS `r.key`, `__table3`.`value` AS `r.value` FROM (SELECT `__table2`.`key` AS `key`, `__table2`.`value` AS `value` FROM `default`.`num_1` AS `__table2`) AS `__table1` ALL INNER JOIN (SELECT `__table4`.`key` AS `key`, `__table4`.`value` AS `value` FROM `default`.`num_2` AS `__table4` ALL INNER JOIN (SELECT `__table6`.`number` * 7 AS `key` FROM numbers(100000.) AS `__table6`) AS `__table5` ON `__table4`.`key` = `__table5`.`key` SETTINGS parallel_replicas_prefer_local_join = 1) AS `__table3` ON `__table1`.`key` = `__table3`.`key` ORDER BY `__table1`.`key` ASC LIMIT _CAST(10000, 'UInt64'), _CAST(10, 'UInt64') (stage: WithMergeableState) +SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value`, `__table3`.`key` AS `r.key`, `__table3`.`value` AS `r.value` FROM (SELECT `__table2`.`key` AS `key`, `__table2`.`value` AS `value` FROM `default`.`num_1` AS `__table2`) AS `__table1` ALL INNER JOIN (SELECT `__table4`.`key` AS `key`, `__table4`.`value` AS `value` FROM `default`.`num_2` AS `__table4` ALL INNER JOIN (SELECT `__table6`.`number` * 7 AS `key` FROM numbers(100000.) AS `__table6`) AS `__table5` ON `__table4`.`key` = `__table5`.`key` SETTINGS parallel_replicas_prefer_local_join = 1) AS `__table3` ON `__table1`.`key` = `__table3`.`key` ORDER BY `__table1`.`key` ASC LIMIT _CAST(10000, 'UInt64'), _CAST(10, 'UInt64') (stage: WithMergeableState) + WithOrderCoordinator: Coordination done diff --git a/tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer_2.sh b/tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer_2.sh new file mode 100755 index 00000000000..7a0e2d9bfdb --- /dev/null +++ b/tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer_2.sh @@ -0,0 +1,103 @@ +#!/usr/bin/env bash +# Tags: long, no-random-settings, no-random-merge-tree-settings + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + + +$CLICKHOUSE_CLIENT -nm -q " +drop table if exists num_1; +drop table if exists num_2; + +create table num_1 (key UInt64, value String) engine = MergeTree order by key; +create table num_2 (key UInt64, value Int64) engine = MergeTree order by key; + +insert into num_1 select number * 2, toString(number * 2) from numbers(1e7); +insert into num_2 select number * 3, -number from numbers(1.5e6); +" + +############## +echo +echo "simple (local) join with analyzer and parallel replicas" + +$CLICKHOUSE_CLIENT -q " +select * from (select key, value from num_1) l +inner join (select key, value from num_2) r on l.key = r.key +order by l.key limit 10 offset 700000 +SETTINGS allow_experimental_analyzer=1, +allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, +cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', parallel_replicas_prefer_local_join=1" + +$CLICKHOUSE_CLIENT -q " +select * from (select key, value from num_1) l +inner join (select key, value from num_2) r on l.key = r.key +order by l.key limit 10 offset 700000 +SETTINGS allow_experimental_analyzer=1, send_logs_level='trace', +allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, +cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', parallel_replicas_prefer_local_join=1" 2>&1 | +grep "executeQuery\|.*Coordinator: Coordination done" | +grep -o "SELECT.*WithMergeableState)\|.*Coordinator: Coordination done" | +sed -re 's/_data_[[:digit:]]+_[[:digit:]]+/_data_/g' + + +############## +echo +echo "simple (local) join with analyzer and parallel replicas and full sorting merge join" + +$CLICKHOUSE_CLIENT -q " +select * from (select key, value from num_1) l +inner join (select key, value from num_2) r on l.key = r.key +order by l.key limit 10 offset 700000 +SETTINGS allow_experimental_analyzer=1, join_algorithm='full_sorting_merge', +allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, +cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', parallel_replicas_prefer_local_join=1" + +$CLICKHOUSE_CLIENT -q " +select * from (select key, value from num_1) l +inner join (select key, value from num_2) r on l.key = r.key +order by l.key limit 10 offset 700000 +SETTINGS allow_experimental_analyzer=1, join_algorithm='full_sorting_merge', send_logs_level='trace', +allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, +cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', parallel_replicas_prefer_local_join=1" 2>&1 | +grep "executeQuery\|.*Coordinator: Coordination done" | +grep -o "SELECT.*WithMergeableState)\|.*Coordinator: Coordination done" | +sed -re 's/_data_[[:digit:]]+_[[:digit:]]+/_data_/g' + + +############## +echo +echo "nested join with analyzer" + +$CLICKHOUSE_CLIENT -q " +select * from (select key, value from num_1) l +inner join (select key, value from num_2 inner join + (select number * 7 as key from numbers(1e5)) as nn on num_2.key = nn.key settings parallel_replicas_prefer_local_join=1) r +on l.key = r.key order by l.key limit 10 offset 10000 +SETTINGS allow_experimental_analyzer=1" + + +############## +echo +echo "nested join with analyzer and parallel replicas, both local" + +$CLICKHOUSE_CLIENT -q " +select * from (select key, value from num_1) l +inner join (select key, value from num_2 inner join + (select number * 7 as key from numbers(1e5)) as nn on num_2.key = nn.key settings parallel_replicas_prefer_local_join=1) r +on l.key = r.key order by l.key limit 10 offset 10000 +SETTINGS allow_experimental_analyzer=1, +allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, +cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', parallel_replicas_prefer_local_join=1" + +$CLICKHOUSE_CLIENT -q " +select * from (select key, value from num_1) l +inner join (select key, value from num_2 inner join + (select number * 7 as key from numbers(1e5)) as nn on num_2.key = nn.key settings parallel_replicas_prefer_local_join=1) r +on l.key = r.key order by l.key limit 10 offset 10000 +SETTINGS allow_experimental_analyzer=1, join_algorithm='full_sorting_merge', send_logs_level='trace', +allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, +cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', parallel_replicas_prefer_local_join=1" 2>&1 | +grep "executeQuery\|.*Coordinator: Coordination done" | +grep -o "SELECT.*WithMergeableState)\|.*Coordinator: Coordination done" | +sed -re 's/_data_[[:digit:]]+_[[:digit:]]+/_data_/g' diff --git a/tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer.reference b/tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer_3.reference similarity index 55% rename from tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer.reference rename to tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer_3.reference index d7fa419aeab..c0485b817c4 100644 --- a/tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer.reference +++ b/tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer_3.reference @@ -1,91 +1,4 @@ -simple join with analyzer -4200000 4200000 4200000 -1400000 -4200006 4200006 4200006 -1400002 -4200012 4200012 4200012 -1400004 -4200018 4200018 4200018 -1400006 -4200024 4200024 4200024 -1400008 -4200030 4200030 4200030 -1400010 -4200036 4200036 4200036 -1400012 -4200042 4200042 4200042 -1400014 -4200048 4200048 4200048 -1400016 -4200054 4200054 4200054 -1400018 - -simple (global) join with analyzer and parallel replicas -4200000 4200000 4200000 -1400000 -4200006 4200006 4200006 -1400002 -4200012 4200012 4200012 -1400004 -4200018 4200018 4200018 -1400006 -4200024 4200024 4200024 -1400008 -4200030 4200030 4200030 -1400010 -4200036 4200036 4200036 -1400012 -4200042 4200042 4200042 -1400014 -4200048 4200048 4200048 -1400016 -4200054 4200054 4200054 -1400018 -SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value` FROM `default`.`num_2` AS `__table1` (stage: WithMergeableState) -SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value` FROM `default`.`num_2` AS `__table1` (stage: WithMergeableState) - DefaultCoordinator: Coordination done -SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value`, `__table3`.`key` AS `r.key`, `__table3`.`value` AS `r.value` FROM (SELECT `__table2`.`key` AS `key`, `__table2`.`value` AS `value` FROM `default`.`num_1` AS `__table2`) AS `__table1` GLOBAL ALL INNER JOIN `_data_` AS `__table3` ON `__table1`.`key` = `__table3`.`key` ORDER BY `__table1`.`key` ASC LIMIT _CAST(700000, 'UInt64'), _CAST(10, 'UInt64') (stage: WithMergeableState) -SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value`, `__table3`.`key` AS `r.key`, `__table3`.`value` AS `r.value` FROM (SELECT `__table2`.`key` AS `key`, `__table2`.`value` AS `value` FROM `default`.`num_1` AS `__table2`) AS `__table1` GLOBAL ALL INNER JOIN `_data_` AS `__table3` ON `__table1`.`key` = `__table3`.`key` ORDER BY `__table1`.`key` ASC LIMIT _CAST(700000, 'UInt64'), _CAST(10, 'UInt64') (stage: WithMergeableState) - DefaultCoordinator: Coordination done - -simple (local) join with analyzer and parallel replicas -4200000 4200000 4200000 -1400000 -4200006 4200006 4200006 -1400002 -4200012 4200012 4200012 -1400004 -4200018 4200018 4200018 -1400006 -4200024 4200024 4200024 -1400008 -4200030 4200030 4200030 -1400010 -4200036 4200036 4200036 -1400012 -4200042 4200042 4200042 -1400014 -4200048 4200048 4200048 -1400016 -4200054 4200054 4200054 -1400018 -SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value`, `__table3`.`key` AS `r.key`, `__table3`.`value` AS `r.value` FROM (SELECT `__table2`.`key` AS `key`, `__table2`.`value` AS `value` FROM `default`.`num_1` AS `__table2`) AS `__table1` ALL INNER JOIN (SELECT `__table4`.`key` AS `key`, `__table4`.`value` AS `value` FROM `default`.`num_2` AS `__table4`) AS `__table3` ON `__table1`.`key` = `__table3`.`key` ORDER BY `__table1`.`key` ASC LIMIT _CAST(700000, 'UInt64'), _CAST(10, 'UInt64') (stage: WithMergeableState) -SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value`, `__table3`.`key` AS `r.key`, `__table3`.`value` AS `r.value` FROM (SELECT `__table2`.`key` AS `key`, `__table2`.`value` AS `value` FROM `default`.`num_1` AS `__table2`) AS `__table1` ALL INNER JOIN (SELECT `__table4`.`key` AS `key`, `__table4`.`value` AS `value` FROM `default`.`num_2` AS `__table4`) AS `__table3` ON `__table1`.`key` = `__table3`.`key` ORDER BY `__table1`.`key` ASC LIMIT _CAST(700000, 'UInt64'), _CAST(10, 'UInt64') (stage: WithMergeableState) - DefaultCoordinator: Coordination done - -simple (local) join with analyzer and parallel replicas and full sorting merge join -4200000 4200000 4200000 -1400000 -4200006 4200006 4200006 -1400002 -4200012 4200012 4200012 -1400004 -4200018 4200018 4200018 -1400006 -4200024 4200024 4200024 -1400008 -4200030 4200030 4200030 -1400010 -4200036 4200036 4200036 -1400012 -4200042 4200042 4200042 -1400014 -4200048 4200048 4200048 -1400016 -4200054 4200054 4200054 -1400018 -SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value`, `__table3`.`key` AS `r.key`, `__table3`.`value` AS `r.value` FROM (SELECT `__table2`.`key` AS `key`, `__table2`.`value` AS `value` FROM `default`.`num_1` AS `__table2`) AS `__table1` ALL INNER JOIN (SELECT `__table4`.`key` AS `key`, `__table4`.`value` AS `value` FROM `default`.`num_2` AS `__table4`) AS `__table3` ON `__table1`.`key` = `__table3`.`key` ORDER BY `__table1`.`key` ASC LIMIT _CAST(700000, 'UInt64'), _CAST(10, 'UInt64') (stage: WithMergeableState) -SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value`, `__table3`.`key` AS `r.key`, `__table3`.`value` AS `r.value` FROM (SELECT `__table2`.`key` AS `key`, `__table2`.`value` AS `value` FROM `default`.`num_1` AS `__table2`) AS `__table1` ALL INNER JOIN (SELECT `__table4`.`key` AS `key`, `__table4`.`value` AS `value` FROM `default`.`num_2` AS `__table4`) AS `__table3` ON `__table1`.`key` = `__table3`.`key` ORDER BY `__table1`.`key` ASC LIMIT _CAST(700000, 'UInt64'), _CAST(10, 'UInt64') (stage: WithMergeableState) - WithOrderCoordinator: Coordination done - -nested join with analyzer -420000 420000 420000 -140000 -420042 420042 420042 -140014 -420084 420084 420084 -140028 -420126 420126 420126 -140042 -420168 420168 420168 -140056 -420210 420210 420210 -140070 -420252 420252 420252 -140084 -420294 420294 420294 -140098 -420336 420336 420336 -140112 -420378 420378 420378 -140126 - -nested join with analyzer and parallel replicas, both local -420000 420000 420000 -140000 -420042 420042 420042 -140014 -420084 420084 420084 -140028 -420126 420126 420126 -140042 -420168 420168 420168 -140056 -420210 420210 420210 -140070 -420252 420252 420252 -140084 -420294 420294 420294 -140098 -420336 420336 420336 -140112 -420378 420378 420378 -140126 -SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value`, `__table3`.`key` AS `r.key`, `__table3`.`value` AS `r.value` FROM (SELECT `__table2`.`key` AS `key`, `__table2`.`value` AS `value` FROM `default`.`num_1` AS `__table2`) AS `__table1` ALL INNER JOIN (SELECT `__table4`.`key` AS `key`, `__table4`.`value` AS `value` FROM `default`.`num_2` AS `__table4` ALL INNER JOIN (SELECT `__table6`.`number` * 7 AS `key` FROM numbers(100000.) AS `__table6`) AS `__table5` ON `__table4`.`key` = `__table5`.`key` SETTINGS parallel_replicas_prefer_local_join = 1) AS `__table3` ON `__table1`.`key` = `__table3`.`key` ORDER BY `__table1`.`key` ASC LIMIT _CAST(10000, 'UInt64'), _CAST(10, 'UInt64') (stage: WithMergeableState) -SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value`, `__table3`.`key` AS `r.key`, `__table3`.`value` AS `r.value` FROM (SELECT `__table2`.`key` AS `key`, `__table2`.`value` AS `value` FROM `default`.`num_1` AS `__table2`) AS `__table1` ALL INNER JOIN (SELECT `__table4`.`key` AS `key`, `__table4`.`value` AS `value` FROM `default`.`num_2` AS `__table4` ALL INNER JOIN (SELECT `__table6`.`number` * 7 AS `key` FROM numbers(100000.) AS `__table6`) AS `__table5` ON `__table4`.`key` = `__table5`.`key` SETTINGS parallel_replicas_prefer_local_join = 1) AS `__table3` ON `__table1`.`key` = `__table3`.`key` ORDER BY `__table1`.`key` ASC LIMIT _CAST(10000, 'UInt64'), _CAST(10, 'UInt64') (stage: WithMergeableState) - WithOrderCoordinator: Coordination done - nested join with analyzer and parallel replicas, both global 420000 420000 420000 -140000 420042 420042 420042 -140014 diff --git a/tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer.sh b/tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer_3.sh similarity index 58% rename from tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer.sh rename to tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer_3.sh index 2840482da6d..e49a340ab67 100755 --- a/tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer.sh +++ b/tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer_3.sh @@ -17,125 +17,6 @@ insert into num_1 select number * 2, toString(number * 2) from numbers(1e7); insert into num_2 select number * 3, -number from numbers(1.5e6); " -############## -echo -echo "simple join with analyzer" - -$CLICKHOUSE_CLIENT -q " -select * from (select key, value from num_1) l -inner join (select key, value from num_2) r on l.key = r.key -order by l.key limit 10 offset 700000 -SETTINGS allow_experimental_analyzer=1" - -############## -echo -echo "simple (global) join with analyzer and parallel replicas" - -$CLICKHOUSE_CLIENT -q " -select * from (select key, value from num_1) l -inner join (select key, value from num_2) r on l.key = r.key -order by l.key limit 10 offset 700000 -SETTINGS allow_experimental_analyzer=1, allow_experimental_parallel_reading_from_replicas = 2, -max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, -cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', parallel_replicas_prefer_local_join=0" - -$CLICKHOUSE_CLIENT -q " -select * from (select key, value from num_1) l -inner join (select key, value from num_2) r on l.key = r.key -order by l.key limit 10 offset 700000 -SETTINGS allow_experimental_analyzer=1, allow_experimental_parallel_reading_from_replicas = 2, send_logs_level='trace', -max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, -cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', parallel_replicas_prefer_local_join=0" 2>&1 | -grep "executeQuery\|.*Coordinator: Coordination done" | -grep -o "SELECT.*WithMergeableState)\|.*Coordinator: Coordination done" | -sed -re 's/_data_[[:digit:]]+_[[:digit:]]+/_data_/g' - -############## -echo -echo "simple (local) join with analyzer and parallel replicas" - -$CLICKHOUSE_CLIENT -q " -select * from (select key, value from num_1) l -inner join (select key, value from num_2) r on l.key = r.key -order by l.key limit 10 offset 700000 -SETTINGS allow_experimental_analyzer=1, -allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, -cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', parallel_replicas_prefer_local_join=1" - -$CLICKHOUSE_CLIENT -q " -select * from (select key, value from num_1) l -inner join (select key, value from num_2) r on l.key = r.key -order by l.key limit 10 offset 700000 -SETTINGS allow_experimental_analyzer=1, send_logs_level='trace', -allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, -cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', parallel_replicas_prefer_local_join=1" 2>&1 | -grep "executeQuery\|.*Coordinator: Coordination done" | -grep -o "SELECT.*WithMergeableState)\|.*Coordinator: Coordination done" | -sed -re 's/_data_[[:digit:]]+_[[:digit:]]+/_data_/g' - - -############## -echo -echo "simple (local) join with analyzer and parallel replicas and full sorting merge join" - -$CLICKHOUSE_CLIENT -q " -select * from (select key, value from num_1) l -inner join (select key, value from num_2) r on l.key = r.key -order by l.key limit 10 offset 700000 -SETTINGS allow_experimental_analyzer=1, join_algorithm='full_sorting_merge', -allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, -cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', parallel_replicas_prefer_local_join=1" - -$CLICKHOUSE_CLIENT -q " -select * from (select key, value from num_1) l -inner join (select key, value from num_2) r on l.key = r.key -order by l.key limit 10 offset 700000 -SETTINGS allow_experimental_analyzer=1, join_algorithm='full_sorting_merge', send_logs_level='trace', -allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, -cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', parallel_replicas_prefer_local_join=1" 2>&1 | -grep "executeQuery\|.*Coordinator: Coordination done" | -grep -o "SELECT.*WithMergeableState)\|.*Coordinator: Coordination done" | -sed -re 's/_data_[[:digit:]]+_[[:digit:]]+/_data_/g' - - -############## -echo -echo "nested join with analyzer" - -$CLICKHOUSE_CLIENT -q " -select * from (select key, value from num_1) l -inner join (select key, value from num_2 inner join - (select number * 7 as key from numbers(1e5)) as nn on num_2.key = nn.key settings parallel_replicas_prefer_local_join=1) r -on l.key = r.key order by l.key limit 10 offset 10000 -SETTINGS allow_experimental_analyzer=1" - - -############## -echo -echo "nested join with analyzer and parallel replicas, both local" - -$CLICKHOUSE_CLIENT -q " -select * from (select key, value from num_1) l -inner join (select key, value from num_2 inner join - (select number * 7 as key from numbers(1e5)) as nn on num_2.key = nn.key settings parallel_replicas_prefer_local_join=1) r -on l.key = r.key order by l.key limit 10 offset 10000 -SETTINGS allow_experimental_analyzer=1, -allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, -cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', parallel_replicas_prefer_local_join=1" - -$CLICKHOUSE_CLIENT -q " -select * from (select key, value from num_1) l -inner join (select key, value from num_2 inner join - (select number * 7 as key from numbers(1e5)) as nn on num_2.key = nn.key settings parallel_replicas_prefer_local_join=1) r -on l.key = r.key order by l.key limit 10 offset 10000 -SETTINGS allow_experimental_analyzer=1, join_algorithm='full_sorting_merge', send_logs_level='trace', -allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, -cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', parallel_replicas_prefer_local_join=1" 2>&1 | -grep "executeQuery\|.*Coordinator: Coordination done" | -grep -o "SELECT.*WithMergeableState)\|.*Coordinator: Coordination done" | -sed -re 's/_data_[[:digit:]]+_[[:digit:]]+/_data_/g' - - ############## echo echo "nested join with analyzer and parallel replicas, both global" From aec7848525d2d6bd1cc9e7c573c25bd3b4ac79e7 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Fri, 26 Jul 2024 13:04:44 +0000 Subject: [PATCH 261/661] fix --- src/Processors/Transforms/WindowTransform.cpp | 2 +- .../03210_lag_lead_inframe_types.reference | 20 +++++++++++++++++++ .../03210_lag_lead_inframe_types.sql | 4 ++-- 3 files changed, 23 insertions(+), 3 deletions(-) diff --git a/src/Processors/Transforms/WindowTransform.cpp b/src/Processors/Transforms/WindowTransform.cpp index 61be0c32a7d..1eac08780e9 100644 --- a/src/Processors/Transforms/WindowTransform.cpp +++ b/src/Processors/Transforms/WindowTransform.cpp @@ -2424,7 +2424,7 @@ struct WindowFunctionLagLeadInFrame final : public WindowFunction argument_types[0]->getName(), argument_types[2]->getName()); } - + const auto from_name = argument_types[2]->getName(); const auto to_name = argument_types[0]->getName(); ColumnsWithTypeAndName arguments diff --git a/tests/queries/0_stateless/03210_lag_lead_inframe_types.reference b/tests/queries/0_stateless/03210_lag_lead_inframe_types.reference index cc3b9a096b9..d4734a85e72 100644 --- a/tests/queries/0_stateless/03210_lag_lead_inframe_types.reference +++ b/tests/queries/0_stateless/03210_lag_lead_inframe_types.reference @@ -18,3 +18,23 @@ 7 8 9 +0 +1 +2 +2 +2 +2 +2 +2 +2 +2 +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 diff --git a/tests/queries/0_stateless/03210_lag_lead_inframe_types.sql b/tests/queries/0_stateless/03210_lag_lead_inframe_types.sql index 5466cfe0fad..f6017ee6690 100644 --- a/tests/queries/0_stateless/03210_lag_lead_inframe_types.sql +++ b/tests/queries/0_stateless/03210_lag_lead_inframe_types.sql @@ -1,4 +1,4 @@ -SELECT lagInFrame(2::UInt128, 2, number) OVER w FROM numbers(10) WINDOW w AS (ORDER BY number); -- { serverError BAD_ARGUMENTS } -SELECT leadInFrame(2::UInt128, 2, number) OVER w FROM numbers(10) WINDOW w AS (ORDER BY number); -- { serverError BAD_ARGUMENTS } +SELECT lagInFrame(2::UInt128, 2, number) OVER w FROM numbers(10) WINDOW w AS (ORDER BY number); +SELECT leadInFrame(2::UInt128, 2, number) OVER w FROM numbers(10) WINDOW w AS (ORDER BY number); SELECT lagInFrame(2::UInt64, 2, number) OVER w FROM numbers(10) WINDOW w AS (ORDER BY number); SELECT leadInFrame(2::UInt64, 2, number) OVER w FROM numbers(10) WINDOW w AS (ORDER BY number); From d2b3be2fb8345436422e6214f7652545696be6ea Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Fri, 26 Jul 2024 15:05:03 +0200 Subject: [PATCH 262/661] Apply optimizations for a single file --- src/CMakeLists.txt | 3 + src/Client/ClientBase.cpp | 163 ---------------------- src/Client/ClientBaseOptimizedParts.cpp | 178 ++++++++++++++++++++++++ 3 files changed, 181 insertions(+), 163 deletions(-) create mode 100644 src/Client/ClientBaseOptimizedParts.cpp diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 0f84dd35320..8c133971785 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -226,6 +226,9 @@ add_object_library(clickhouse_storages_windowview Storages/WindowView) add_object_library(clickhouse_storages_s3queue Storages/ObjectStorageQueue) add_object_library(clickhouse_storages_materializedview Storages/MaterializedView) add_object_library(clickhouse_client Client) +# Always compile this file with the highest possible level of optimizations, even in Debug builds. +# https://github.com/ClickHouse/ClickHouse/issues/65745 +set_source_files_properties(Client/ClientBaseOptimizedParts.cpp PROPERTIES COMPILE_FLAGS "-O3") add_object_library(clickhouse_bridge BridgeHelper) add_object_library(clickhouse_server Server) add_object_library(clickhouse_server_http Server/HTTP) diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 1e1917e1ca1..04af9db7afe 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -108,7 +108,6 @@ namespace ErrorCodes extern const int UNEXPECTED_PACKET_FROM_SERVER; extern const int INVALID_USAGE_OF_INPUT; extern const int CANNOT_SET_SIGNAL_HANDLER; - extern const int UNRECOGNIZED_ARGUMENTS; extern const int LOGICAL_ERROR; extern const int CANNOT_OPEN_FILE; extern const int FILE_ALREADY_EXISTS; @@ -2848,168 +2847,6 @@ void ClientBase::showClientVersion() output_stream << VERSION_NAME << " " + getName() + " version " << VERSION_STRING << VERSION_OFFICIAL << "." << std::endl; } -namespace -{ - -/// Define transparent hash to we can use -/// std::string_view with the containers -struct TransparentStringHash -{ - using is_transparent = void; - size_t operator()(std::string_view txt) const - { - return std::hash{}(txt); - } -}; - -/* - * This functor is used to parse command line arguments and replace dashes with underscores, - * allowing options to be specified using either dashes or underscores. - */ -class OptionsAliasParser -{ -public: - explicit OptionsAliasParser(const boost::program_options::options_description& options) - { - options_names.reserve(options.options().size()); - for (const auto& option : options.options()) - options_names.insert(option->long_name()); - } - - /* - * Parses arguments by replacing dashes with underscores, and matches the resulting name with known options - * Implements boost::program_options::ext_parser logic - */ - std::pair operator()(const std::string & token) const - { - if (!token.starts_with("--")) - return {}; - std::string arg = token.substr(2); - - // divide token by '=' to separate key and value if options style=long_allow_adjacent - auto pos_eq = arg.find('='); - std::string key = arg.substr(0, pos_eq); - - if (options_names.contains(key)) - // option does not require any changes, because it is already correct - return {}; - - std::replace(key.begin(), key.end(), '-', '_'); - if (!options_names.contains(key)) - // after replacing '-' with '_' argument is still unknown - return {}; - - std::string value; - if (pos_eq != std::string::npos && pos_eq < arg.size()) - value = arg.substr(pos_eq + 1); - - return {key, value}; - } - -private: - std::unordered_set options_names; -}; - -} - -/// Enable optimizations even in debug builds because otherwise options parsing becomes extremely slow affecting .sh tests -#if defined(__clang__) -#pragma clang optimize on -#endif -void ClientBase::parseAndCheckOptions(OptionsDescription & options_description, po::variables_map & options, Arguments & arguments) -{ - if (allow_repeated_settings) - addProgramOptionsAsMultitokens(cmd_settings, options_description.main_description.value()); - else - addProgramOptions(cmd_settings, options_description.main_description.value()); - - if (allow_merge_tree_settings) - { - /// Add merge tree settings manually, because names of some settings - /// may clash. Query settings have higher priority and we just - /// skip ambiguous merge tree settings. - auto & main_options = options_description.main_description.value(); - - std::unordered_set> main_option_names; - for (const auto & option : main_options.options()) - main_option_names.insert(option->long_name()); - - for (const auto & setting : cmd_merge_tree_settings.all()) - { - const auto add_setting = [&](const std::string_view name) - { - if (auto it = main_option_names.find(name); it != main_option_names.end()) - return; - - if (allow_repeated_settings) - addProgramOptionAsMultitoken(cmd_merge_tree_settings, main_options, name, setting); - else - addProgramOption(cmd_merge_tree_settings, main_options, name, setting); - }; - - const auto & setting_name = setting.getName(); - - add_setting(setting_name); - - const auto & settings_to_aliases = MergeTreeSettings::Traits::settingsToAliases(); - if (auto it = settings_to_aliases.find(setting_name); it != settings_to_aliases.end()) - { - for (const auto alias : it->second) - { - add_setting(alias); - } - } - } - } - - /// Parse main commandline options. - auto parser = po::command_line_parser(arguments) - .options(options_description.main_description.value()) - .extra_parser(OptionsAliasParser(options_description.main_description.value())) - .allow_unregistered(); - po::parsed_options parsed = parser.run(); - - /// Check unrecognized options without positional options. - auto unrecognized_options = po::collect_unrecognized(parsed.options, po::collect_unrecognized_mode::exclude_positional); - if (!unrecognized_options.empty()) - { - auto hints = this->getHints(unrecognized_options[0]); - if (!hints.empty()) - throw Exception(ErrorCodes::UNRECOGNIZED_ARGUMENTS, "Unrecognized option '{}'. Maybe you meant {}", - unrecognized_options[0], toString(hints)); - - throw Exception(ErrorCodes::UNRECOGNIZED_ARGUMENTS, "Unrecognized option '{}'", unrecognized_options[0]); - } - - /// Check positional options. - for (const auto & op : parsed.options) - { - if (!op.unregistered && op.string_key.empty() && !op.original_tokens[0].starts_with("--") - && !op.original_tokens[0].empty() && !op.value.empty()) - { - /// Two special cases for better usability: - /// - if the option contains a whitespace, it might be a query: clickhouse "SELECT 1" - /// These are relevant for interactive usage - user-friendly, but questionable in general. - /// In case of ambiguity or for scripts, prefer using proper options. - - const auto & token = op.original_tokens[0]; - po::variable_value value(boost::any(op.value), false); - - const char * option; - if (token.contains(' ')) - option = "query"; - else - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Positional option `{}` is not supported.", token); - - if (!options.emplace(option, value).second) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Positional option `{}` is not supported.", token); - } - } - - po::store(parsed, options); -} - - void ClientBase::init(int argc, char ** argv) { namespace po = boost::program_options; diff --git a/src/Client/ClientBaseOptimizedParts.cpp b/src/Client/ClientBaseOptimizedParts.cpp new file mode 100644 index 00000000000..31614d301b6 --- /dev/null +++ b/src/Client/ClientBaseOptimizedParts.cpp @@ -0,0 +1,178 @@ +#include + +#include + +namespace DB +{ + +/** + * Program ptions parsing is very slow in debug builds and it affects .sh tests + * causing them to timeout sporadically. + * It seems impossible to enable optimizations for a single function (only to disable them), so + * instead we extract the code to a separate source file and compile it with different options. + */ + +/// +namespace ErrorCodes +{ + extern const int UNRECOGNIZED_ARGUMENTS; +} + +namespace +{ + +/// Define transparent hash to we can use +/// std::string_view with the containers +struct TransparentStringHash +{ + using is_transparent = void; + size_t operator()(std::string_view txt) const + { + return std::hash{}(txt); + } +}; + +/* + * This functor is used to parse command line arguments and replace dashes with underscores, + * allowing options to be specified using either dashes or underscores. + */ +class OptionsAliasParser +{ +public: + explicit OptionsAliasParser(const boost::program_options::options_description& options) + { + options_names.reserve(options.options().size()); + for (const auto& option : options.options()) + options_names.insert(option->long_name()); + } + + /* + * Parses arguments by replacing dashes with underscores, and matches the resulting name with known options + * Implements boost::program_options::ext_parser logic + */ + std::pair operator()(const std::string & token) const + { + if (!token.starts_with("--")) + return {}; + std::string arg = token.substr(2); + + // divide token by '=' to separate key and value if options style=long_allow_adjacent + auto pos_eq = arg.find('='); + std::string key = arg.substr(0, pos_eq); + + if (options_names.contains(key)) + // option does not require any changes, because it is already correct + return {}; + + std::replace(key.begin(), key.end(), '-', '_'); + if (!options_names.contains(key)) + // after replacing '-' with '_' argument is still unknown + return {}; + + std::string value; + if (pos_eq != std::string::npos && pos_eq < arg.size()) + value = arg.substr(pos_eq + 1); + + return {key, value}; + } + +private: + std::unordered_set options_names; +}; + +} + +void ClientBase::parseAndCheckOptions(OptionsDescription & options_description, po::variables_map & options, Arguments & arguments) +{ + if (allow_repeated_settings) + addProgramOptionsAsMultitokens(cmd_settings, options_description.main_description.value()); + else + addProgramOptions(cmd_settings, options_description.main_description.value()); + + if (allow_merge_tree_settings) + { + /// Add merge tree settings manually, because names of some settings + /// may clash. Query settings have higher priority and we just + /// skip ambiguous merge tree settings. + auto & main_options = options_description.main_description.value(); + + std::unordered_set> main_option_names; + for (const auto & option : main_options.options()) + main_option_names.insert(option->long_name()); + + for (const auto & setting : cmd_merge_tree_settings.all()) + { + const auto add_setting = [&](const std::string_view name) + { + if (auto it = main_option_names.find(name); it != main_option_names.end()) + return; + + if (allow_repeated_settings) + addProgramOptionAsMultitoken(cmd_merge_tree_settings, main_options, name, setting); + else + addProgramOption(cmd_merge_tree_settings, main_options, name, setting); + }; + + const auto & setting_name = setting.getName(); + + add_setting(setting_name); + + const auto & settings_to_aliases = MergeTreeSettings::Traits::settingsToAliases(); + if (auto it = settings_to_aliases.find(setting_name); it != settings_to_aliases.end()) + { + for (const auto alias : it->second) + { + add_setting(alias); + } + } + } + } + + /// Parse main commandline options. + auto parser = po::command_line_parser(arguments) + .options(options_description.main_description.value()) + .extra_parser(OptionsAliasParser(options_description.main_description.value())) + .allow_unregistered(); + po::parsed_options parsed = parser.run(); + + /// Check unrecognized options without positional options. + auto unrecognized_options = po::collect_unrecognized(parsed.options, po::collect_unrecognized_mode::exclude_positional); + if (!unrecognized_options.empty()) + { + auto hints = this->getHints(unrecognized_options[0]); + if (!hints.empty()) + throw Exception(ErrorCodes::UNRECOGNIZED_ARGUMENTS, "Unrecognized option '{}'. Maybe you meant {}", + unrecognized_options[0], toString(hints)); + + throw Exception(ErrorCodes::UNRECOGNIZED_ARGUMENTS, "Unrecognized option '{}'", unrecognized_options[0]); + } + + /// Check positional options. + for (const auto & op : parsed.options) + { + if (!op.unregistered && op.string_key.empty() && !op.original_tokens[0].starts_with("--") + && !op.original_tokens[0].empty() && !op.value.empty()) + { + /// Two special cases for better usability: + /// - if the option contains a whitespace, it might be a query: clickhouse "SELECT 1" + /// These are relevant for interactive usage - user-friendly, but questionable in general. + /// In case of ambiguity or for scripts, prefer using proper options. + + const auto & token = op.original_tokens[0]; + po::variable_value value(boost::any(op.value), false); + + const char * option; + if (token.contains(' ')) + option = "query"; + else + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Positional option `{}` is not supported.", token); + + if (!options.emplace(option, value).second) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Positional option `{}` is not supported.", token); + } + } + + po::store(parsed, options); +} + +} From 42384af0ef38dd326337e8cf18327871924f7359 Mon Sep 17 00:00:00 2001 From: Pablo Marcos Date: Thu, 25 Jul 2024 15:42:50 +0000 Subject: [PATCH 263/661] Fix crash when the connection is empty --- src/Storages/Distributed/DistributedAsyncInsertBatch.cpp | 6 ++++++ .../Distributed/DistributedAsyncInsertDirectoryQueue.cpp | 3 +++ 2 files changed, 9 insertions(+) diff --git a/src/Storages/Distributed/DistributedAsyncInsertBatch.cpp b/src/Storages/Distributed/DistributedAsyncInsertBatch.cpp index e1facec5b40..31779a32c1f 100644 --- a/src/Storages/Distributed/DistributedAsyncInsertBatch.cpp +++ b/src/Storages/Distributed/DistributedAsyncInsertBatch.cpp @@ -245,6 +245,9 @@ void DistributedAsyncInsertBatch::sendBatch(const SettingsChanges & settings_cha connection = std::move(result.front().entry); compression_expected = connection->getCompression() == Protocol::Compression::Enable; + if (connection.isNull()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Empty connection"); + LOG_DEBUG(parent.log, "Sending a batch of {} files to {} ({} rows, {} bytes).", files.size(), connection->getDescription(), @@ -303,6 +306,9 @@ void DistributedAsyncInsertBatch::sendSeparateFiles(const SettingsChanges & sett auto connection = std::move(result.front().entry); bool compression_expected = connection->getCompression() == Protocol::Compression::Enable; + if (connection.isNull()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Empty connection"); + RemoteInserter remote(*connection, timeouts, distributed_header.insert_query, insert_settings, diff --git a/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.cpp b/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.cpp index d471c67553d..15998776d27 100644 --- a/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.cpp +++ b/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.cpp @@ -415,6 +415,9 @@ void DistributedAsyncInsertDirectoryQueue::processFile(std::string & file_path, auto result = pool->getManyCheckedForInsert(timeouts, insert_settings, PoolMode::GET_ONE, storage.remote_storage.getQualifiedName()); auto connection = std::move(result.front().entry); + if (connection.isNull()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Empty connection"); + LOG_DEBUG(log, "Sending `{}` to {} ({} rows, {} bytes)", file_path, connection->getDescription(), From 031b435e3ad35a57d82ff98ad4e6f79d47d1cbc3 Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Fri, 26 Jul 2024 15:21:11 +0200 Subject: [PATCH 264/661] Style --- src/Client/ClientBaseOptimizedParts.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/Client/ClientBaseOptimizedParts.cpp b/src/Client/ClientBaseOptimizedParts.cpp index 31614d301b6..421843a0e79 100644 --- a/src/Client/ClientBaseOptimizedParts.cpp +++ b/src/Client/ClientBaseOptimizedParts.cpp @@ -1,20 +1,18 @@ #include - #include namespace DB { /** - * Program ptions parsing is very slow in debug builds and it affects .sh tests + * Program options parsing is very slow in debug builds and it affects .sh tests * causing them to timeout sporadically. * It seems impossible to enable optimizations for a single function (only to disable them), so * instead we extract the code to a separate source file and compile it with different options. */ - -/// namespace ErrorCodes { + extern const int BAD_ARGUMENTS; extern const int UNRECOGNIZED_ARGUMENTS; } From 414ebf035d9e2f47c16ee93d7ff0d21fbee89bff Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 26 Jul 2024 15:32:05 +0200 Subject: [PATCH 265/661] Fix error --- src/IO/ReadWriteBufferFromHTTP.cpp | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/src/IO/ReadWriteBufferFromHTTP.cpp b/src/IO/ReadWriteBufferFromHTTP.cpp index a62f22d4bd9..4b2e6580f9b 100644 --- a/src/IO/ReadWriteBufferFromHTTP.cpp +++ b/src/IO/ReadWriteBufferFromHTTP.cpp @@ -140,6 +140,10 @@ std::optional ReadWriteBufferFromHTTP::tryGetFileSize() { return std::nullopt; } + catch (const Poco::IOException &) + { + return std::nullopt; + } } return file_info->file_size; @@ -324,12 +328,12 @@ void ReadWriteBufferFromHTTP::doWithRetries(std::function && callable, error_message = e.displayText(); exception = std::current_exception(); } - catch (DB::NetException & e) + catch (NetException & e) { error_message = e.displayText(); exception = std::current_exception(); } - catch (DB::HTTPException & e) + catch (HTTPException & e) { if (!isRetriableError(e.getHTTPStatus())) is_retriable = false; @@ -337,7 +341,7 @@ void ReadWriteBufferFromHTTP::doWithRetries(std::function && callable, error_message = e.displayText(); exception = std::current_exception(); } - catch (DB::Exception & e) + catch (Exception & e) { is_retriable = false; @@ -708,6 +712,10 @@ std::optional ReadWriteBufferFromHTTP::tryGetLastModificationTime() { return std::nullopt; } + catch (const Poco::IOException &) + { + return std::nullopt; + } } return file_info->last_modified; From 981135bfb104b5ecfa0f1da5533e3d12f6850838 Mon Sep 17 00:00:00 2001 From: Pablo Marcos Date: Fri, 26 Jul 2024 13:38:42 +0000 Subject: [PATCH 266/661] Fix style check --- src/Storages/Distributed/DistributedAsyncInsertBatch.cpp | 5 +++-- .../Distributed/DistributedAsyncInsertDirectoryQueue.cpp | 2 +- src/Storages/Distributed/DistributedSink.cpp | 2 ++ 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/src/Storages/Distributed/DistributedAsyncInsertBatch.cpp b/src/Storages/Distributed/DistributedAsyncInsertBatch.cpp index 31779a32c1f..5e7b4b979c7 100644 --- a/src/Storages/Distributed/DistributedAsyncInsertBatch.cpp +++ b/src/Storages/Distributed/DistributedAsyncInsertBatch.cpp @@ -28,6 +28,7 @@ namespace ErrorCodes extern const int TOO_MANY_PARTITIONS; extern const int DISTRIBUTED_TOO_MANY_PENDING_BYTES; extern const int ARGUMENT_OUT_OF_BOUND; + extern const int LOGICAL_ERROR; } /// Can the batch be split and send files from batch one-by-one instead? @@ -246,7 +247,7 @@ void DistributedAsyncInsertBatch::sendBatch(const SettingsChanges & settings_cha compression_expected = connection->getCompression() == Protocol::Compression::Enable; if (connection.isNull()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Empty connection"); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Got empty connection"); LOG_DEBUG(parent.log, "Sending a batch of {} files to {} ({} rows, {} bytes).", files.size(), @@ -307,7 +308,7 @@ void DistributedAsyncInsertBatch::sendSeparateFiles(const SettingsChanges & sett bool compression_expected = connection->getCompression() == Protocol::Compression::Enable; if (connection.isNull()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Empty connection"); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Got empty connection"); RemoteInserter remote(*connection, timeouts, distributed_header.insert_query, diff --git a/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.cpp b/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.cpp index 15998776d27..2bb0e720c72 100644 --- a/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.cpp +++ b/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.cpp @@ -416,7 +416,7 @@ void DistributedAsyncInsertDirectoryQueue::processFile(std::string & file_path, auto connection = std::move(result.front().entry); if (connection.isNull()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Empty connection"); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Got empty connection"); LOG_DEBUG(log, "Sending `{}` to {} ({} rows, {} bytes)", file_path, diff --git a/src/Storages/Distributed/DistributedSink.cpp b/src/Storages/Distributed/DistributedSink.cpp index b2ce62caf0a..f8bbc081e55 100644 --- a/src/Storages/Distributed/DistributedSink.cpp +++ b/src/Storages/Distributed/DistributedSink.cpp @@ -378,6 +378,8 @@ DistributedSink::runWritingJob(JobReplica & job, const Block & current_block, si /// (anyway fallback_to_stale_replicas_for_distributed_queries=true by default) auto results = shard_info.pool->getManyCheckedForInsert(timeouts, settings, PoolMode::GET_ONE, storage.remote_storage.getQualifiedName()); job.connection_entry = std::move(results.front().entry); + if (job.connection_entry.isNull()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Got empty connection"); } else { From 503dc25d1021eb1b598ac52efc0370cfd15c57c6 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Fri, 26 Jul 2024 15:53:03 +0200 Subject: [PATCH 267/661] Fix linking --- programs/odbc-bridge/tests/CMakeLists.txt | 2 +- src/CMakeLists.txt | 1 - src/Common/Exception.cpp | 6 +++--- src/Common/Logger.cpp | 12 ++++++++++++ src/Common/Logger.h | 4 ++++ src/Daemon/BaseDaemon.cpp | 2 +- src/Loggers/OwnSplitChannel.cpp | 12 ------------ src/Loggers/OwnSplitChannel.h | 4 ---- 8 files changed, 21 insertions(+), 22 deletions(-) diff --git a/programs/odbc-bridge/tests/CMakeLists.txt b/programs/odbc-bridge/tests/CMakeLists.txt index 2f63aed7942..f1411dbb554 100644 --- a/programs/odbc-bridge/tests/CMakeLists.txt +++ b/programs/odbc-bridge/tests/CMakeLists.txt @@ -1,2 +1,2 @@ clickhouse_add_executable (validate-odbc-connection-string validate-odbc-connection-string.cpp ../validateODBCConnectionString.cpp) -target_link_libraries (validate-odbc-connection-string PRIVATE clickhouse_common_io clickhouse_common_config loggers_no_text_log) +target_link_libraries (validate-odbc-connection-string PRIVATE clickhouse_common_io clickhouse_common_config) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index fede7d69105..0f84dd35320 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -638,7 +638,6 @@ if (ENABLE_TESTS) dbms clickhouse_common_config clickhouse_common_zookeeper - loggers hilite_comparator) if (TARGET ch_contrib::simdjson) diff --git a/src/Common/Exception.cpp b/src/Common/Exception.cpp index c4bd4fbd943..d68537513da 100644 --- a/src/Common/Exception.cpp +++ b/src/Common/Exception.cpp @@ -3,12 +3,12 @@ #include #include #include -#include #include #include #include #include #include +#include #include #include #include @@ -253,7 +253,7 @@ void Exception::setThreadFramePointers(ThreadFramePointersBase frame_pointers) static void tryLogCurrentExceptionImpl(Poco::Logger * logger, const std::string & start_of_message) { - if (!OwnSplitChannel::isLoggingEnabled()) + if (!isLoggingEnabled()) return; try @@ -271,7 +271,7 @@ static void tryLogCurrentExceptionImpl(Poco::Logger * logger, const std::string void tryLogCurrentException(const char * log_name, const std::string & start_of_message) { - if (!OwnSplitChannel::isLoggingEnabled()) + if (!isLoggingEnabled()) return; /// Under high memory pressure, new allocations throw a diff --git a/src/Common/Logger.cpp b/src/Common/Logger.cpp index c8d557bc3a3..bd848abe353 100644 --- a/src/Common/Logger.cpp +++ b/src/Common/Logger.cpp @@ -25,3 +25,15 @@ bool hasLogger(const std::string & name) { return Poco::Logger::has(name); } + +static constinit std::atomic allow_logging{true}; + +bool isLoggingEnabled() +{ + return allow_logging; +} + +void disableLogging() +{ + allow_logging = false; +} diff --git a/src/Common/Logger.h b/src/Common/Logger.h index b54ccd33e72..7471e3dff9b 100644 --- a/src/Common/Logger.h +++ b/src/Common/Logger.h @@ -64,3 +64,7 @@ LoggerRawPtr createRawLogger(const std::string & name, Poco::Channel * channel, * Otherwise, returns false. */ bool hasLogger(const std::string & name); + +void disableLogging(); + +bool isLoggingEnabled(); diff --git a/src/Daemon/BaseDaemon.cpp b/src/Daemon/BaseDaemon.cpp index 366aad00376..e7ae8ea5a1d 100644 --- a/src/Daemon/BaseDaemon.cpp +++ b/src/Daemon/BaseDaemon.cpp @@ -158,7 +158,7 @@ BaseDaemon::~BaseDaemon() tryLogCurrentException(&logger()); } - OwnSplitChannel::disableLogging(); + disableLogging(); } diff --git a/src/Loggers/OwnSplitChannel.cpp b/src/Loggers/OwnSplitChannel.cpp index e29d2a1e0aa..c1594361b2c 100644 --- a/src/Loggers/OwnSplitChannel.cpp +++ b/src/Loggers/OwnSplitChannel.cpp @@ -16,18 +16,6 @@ namespace DB { -static constinit std::atomic allow_logging{true}; - -bool OwnSplitChannel::isLoggingEnabled() -{ - return allow_logging; -} - -void OwnSplitChannel::disableLogging() -{ - allow_logging = false; -} - void OwnSplitChannel::log(const Poco::Message & msg) { if (!isLoggingEnabled()) diff --git a/src/Loggers/OwnSplitChannel.h b/src/Loggers/OwnSplitChannel.h index 9de55f330be..88bb6b9ce76 100644 --- a/src/Loggers/OwnSplitChannel.h +++ b/src/Loggers/OwnSplitChannel.h @@ -39,10 +39,6 @@ public: void setLevel(const std::string & name, int level); - static void disableLogging(); - - static bool isLoggingEnabled(); - private: void logSplit(const Poco::Message & msg); void tryLogSplit(const Poco::Message & msg); From 1e12ac577a4ed4f64d4de4feb8110cd794d4ce90 Mon Sep 17 00:00:00 2001 From: serxa Date: Fri, 26 Jul 2024 14:26:37 +0000 Subject: [PATCH 268/661] Fix flaky `test_pkill_query_log` (tsan) --- tests/integration/test_crash_log/test.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tests/integration/test_crash_log/test.py b/tests/integration/test_crash_log/test.py index a5b82039a84..5a63e6ca6a7 100644 --- a/tests/integration/test_crash_log/test.py +++ b/tests/integration/test_crash_log/test.py @@ -60,6 +60,13 @@ def test_pkill(started_node): def test_pkill_query_log(started_node): + if ( + started_node.is_built_with_thread_sanitizer() + or started_node.is_built_with_address_sanitizer() + or started_node.is_built_with_memory_sanitizer() + ): + pytest.skip("doesn't fit in timeouts for stacktrace generation") + for signal in ["SEGV", "4"]: # force create query_log if it was not created started_node.query("SYSTEM FLUSH LOGS") From d8318fc428e2f5b847415886782fd8e25bca401b Mon Sep 17 00:00:00 2001 From: Konstantin Bogdanov Date: Fri, 26 Jul 2024 17:09:22 +0200 Subject: [PATCH 269/661] Wrap in retries --- ...1676_clickhouse_client_autocomplete.python | 38 +++++++++++++++---- 1 file changed, 31 insertions(+), 7 deletions(-) diff --git a/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.python b/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.python index 0f35d259c7c..fe08a07c214 100644 --- a/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.python +++ b/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.python @@ -10,18 +10,36 @@ DEBUG_LOG = os.path.join( os.path.basename(os.path.abspath(__file__)).strip(".python") + ".debuglog", ) +STATE_MAP = { + -1: "process did not start", + 0: "completion was found", + 1: "process started and said ':)'", + 2: "completion search was started", + 3: "completion is missing", +} + def run_with_timeout(func, args, timeout): - process = multiprocessing.Process(target=func, args=args) - process.start() - process.join(timeout) + for _ in range(5): + state = multiprocessing.Value("i", -1) + process = multiprocessing.Process(target=func, args=args, kwargs={"state": state}) + process.start() + process.join(timeout) - if process.is_alive(): - process.terminate() - print("Timeout") + if state.value in (0, 3): + return + + if process.is_alive(): + process.terminate() + + if state.value == -1: + continue + + print(f"Timeout, state: {STATE_MAP[state.value]}") + return -def test_completion(program, argv, comp_word): +def test_completion(program, argv, comp_word, state=None): comp_begin = comp_word[:-3] shell_pid, master = pty.fork() @@ -41,6 +59,8 @@ def test_completion(program, argv, comp_word): debug_log_fd.write(repr(output_b) + "\n") debug_log_fd.flush() + state.value = 1 + os.write(master, b"SET " + bytes(comp_begin.encode())) output_b = os.read(master, 4096) output = output_b.decode() @@ -55,6 +75,8 @@ def test_completion(program, argv, comp_word): time.sleep(0.01) os.write(master, b"\t") + state.value = 2 + output_b = os.read(master, 4096) output = output_b.decode() debug_log_fd.write(repr(output_b) + "\n") @@ -65,6 +87,7 @@ def test_completion(program, argv, comp_word): # meaning no concise completion is found if "\x07" in output: print(f"{comp_word}: FAIL") + state.value = 3 return output_b = os.read(master, 4096) @@ -73,6 +96,7 @@ def test_completion(program, argv, comp_word): debug_log_fd.flush() print(f"{comp_word}: OK") + state.value = 0 finally: os.close(master) debug_log_fd.close() From ff5cd2051fc8bfd609a9040ffba02697283e69af Mon Sep 17 00:00:00 2001 From: maxvostrikov Date: Fri, 26 Jul 2024 17:10:39 +0200 Subject: [PATCH 270/661] squash! added somme tests in relation with https://github.com/ClickHouse/ClickHouse/pull/54881 with new behaviour when enable_named_columns_in_function_tuple=1 (default value) --- tests/queries/0_stateless/00307_format_xml.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/00307_format_xml.sql b/tests/queries/0_stateless/00307_format_xml.sql index 22566112bc7..a7e0e628945 100644 --- a/tests/queries/0_stateless/00307_format_xml.sql +++ b/tests/queries/0_stateless/00307_format_xml.sql @@ -2,4 +2,4 @@ SET output_format_write_statistics = 0; SELECT 'unnamed columns in tuple'; SELECT 'Hello & world' AS s, 'Hello\n', toDateTime('2001-02-03 04:05:06') AS time, arrayMap(x -> toString(x), range(10)) AS arr, (s, time) AS tpl SETTINGS extremes = 1, enable_named_columns_in_function_tuple = 0 FORMAT XML; SELECT 'named columns in tuple'; -SELECT 'Hello & world' AS s, toDateTime('2001-02-03 04:05:06') AS time, (s, time) AS tpl SETTINGS extremes = 1, enable_named_columns_in_function_tuple = 0 FORMAT XML;` +SELECT 'Hello & world' AS s, toDateTime('2001-02-03 04:05:06') AS time, (s, time) AS tpl SETTINGS extremes = 1, enable_named_columns_in_function_tuple = 0 FORMAT XML; From a59036e5152aac2d44b07e0f62ab0ae1a066bb5b Mon Sep 17 00:00:00 2001 From: vdimir Date: Fri, 26 Jul 2024 15:36:15 +0000 Subject: [PATCH 271/661] chmod +x ./tests/queries/0_stateless/03204_format_join_on.sh --- tests/queries/0_stateless/03204_format_join_on.sh | 0 1 file changed, 0 insertions(+), 0 deletions(-) mode change 100644 => 100755 tests/queries/0_stateless/03204_format_join_on.sh diff --git a/tests/queries/0_stateless/03204_format_join_on.sh b/tests/queries/0_stateless/03204_format_join_on.sh old mode 100644 new mode 100755 From d42fa0690d1b6ec19755b64740d83327e71a914a Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 26 Jul 2024 15:59:23 +0000 Subject: [PATCH 272/661] Remove filterBlockWithDAG. --- src/Storages/MergeTree/MergeTreeData.cpp | 2 +- src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp | 2 +- src/Storages/System/StorageSystemDroppedTablesParts.cpp | 2 +- src/Storages/System/StorageSystemPartsBase.cpp | 4 ++-- src/Storages/System/StorageSystemTables.cpp | 2 +- src/Storages/VirtualColumnUtils.cpp | 7 +++---- src/Storages/VirtualColumnUtils.h | 2 +- 7 files changed, 10 insertions(+), 11 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index ecd25e3cf71..d9ab2894dc4 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -1164,7 +1164,7 @@ std::optional MergeTreeData::totalRowsByPartitionPredicateImpl( if (valid) { virtual_columns_block = getBlockWithVirtualsForFilter(metadata_snapshot, parts); - VirtualColumnUtils::filterBlockWithDAG(std::move(*filter_dag), virtual_columns_block, local_context); + VirtualColumnUtils::filterBlockWithExpression(VirtualColumnUtils::buildFilterExpression(std::move(*filter_dag), local_context), virtual_columns_block); part_values = VirtualColumnUtils::extractSingleValueFromBlock(virtual_columns_block, "_part"); if (part_values.empty()) return 0; diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index a37dbfa554c..a6ef0063069 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -488,7 +488,7 @@ std::optional> MergeTreeDataSelectExecutor::filterPar return {}; auto virtual_columns_block = data.getBlockWithVirtualsForFilter(metadata_snapshot, parts); - VirtualColumnUtils::filterBlockWithDAG(std::move(*dag), virtual_columns_block, context); + VirtualColumnUtils::filterBlockWithExpression(VirtualColumnUtils::buildFilterExpression(std::move(*dag), context), virtual_columns_block); return VirtualColumnUtils::extractSingleValueFromBlock(virtual_columns_block, "_part"); } diff --git a/src/Storages/System/StorageSystemDroppedTablesParts.cpp b/src/Storages/System/StorageSystemDroppedTablesParts.cpp index defc4ec2d2a..c2601b8ebe3 100644 --- a/src/Storages/System/StorageSystemDroppedTablesParts.cpp +++ b/src/Storages/System/StorageSystemDroppedTablesParts.cpp @@ -75,7 +75,7 @@ StoragesDroppedInfoStream::StoragesDroppedInfoStream(std::optional f { /// Filter block_to_filter with columns 'database', 'table', 'engine', 'active'. if (filter) - VirtualColumnUtils::filterBlockWithDAG(std::move(*filter), block_to_filter, context); + VirtualColumnUtils::filterBlockWithExpression(VirtualColumnUtils::buildFilterExpression(std::move(*filter), context), block_to_filter); rows = block_to_filter.rows(); } diff --git a/src/Storages/System/StorageSystemPartsBase.cpp b/src/Storages/System/StorageSystemPartsBase.cpp index a0c9a5c61bd..7ace8ee24aa 100644 --- a/src/Storages/System/StorageSystemPartsBase.cpp +++ b/src/Storages/System/StorageSystemPartsBase.cpp @@ -124,7 +124,7 @@ StoragesInfoStream::StoragesInfoStream(std::optional filter_by_datab /// Filter block_to_filter with column 'database'. if (filter_by_database) - VirtualColumnUtils::filterBlockWithDAG(std::move(*filter_by_database), block_to_filter, context); + VirtualColumnUtils::filterBlockWithExpression(VirtualColumnUtils::buildFilterExpression(std::move(*filter_by_database), context), block_to_filter); rows = block_to_filter.rows(); /// Block contains new columns, update database_column. @@ -204,7 +204,7 @@ StoragesInfoStream::StoragesInfoStream(std::optional filter_by_datab { /// Filter block_to_filter with columns 'database', 'table', 'engine', 'active'. if (filter_by_other_columns) - VirtualColumnUtils::filterBlockWithDAG(std::move(*filter_by_other_columns), block_to_filter, context); + VirtualColumnUtils::filterBlockWithExpression(VirtualColumnUtils::buildFilterExpression(std::move(*filter_by_other_columns), context), block_to_filter); rows = block_to_filter.rows(); } diff --git a/src/Storages/System/StorageSystemTables.cpp b/src/Storages/System/StorageSystemTables.cpp index 9ae21ded9ba..943ce9c317a 100644 --- a/src/Storages/System/StorageSystemTables.cpp +++ b/src/Storages/System/StorageSystemTables.cpp @@ -125,7 +125,7 @@ ColumnPtr getFilteredTables( block.insert(ColumnWithTypeAndName(std::move(engine_column), std::make_shared(), "engine")); if (dag) - VirtualColumnUtils::filterBlockWithDAG(std::move(*dag), block, context); + VirtualColumnUtils::filterBlockWithExpression(VirtualColumnUtils::buildFilterExpression(std::move(*dag), context), block); return block.getByPosition(0).column; } diff --git a/src/Storages/VirtualColumnUtils.cpp b/src/Storages/VirtualColumnUtils.cpp index a25b7b5ca49..90c2c7f93c1 100644 --- a/src/Storages/VirtualColumnUtils.cpp +++ b/src/Storages/VirtualColumnUtils.cpp @@ -77,11 +77,10 @@ void buildSetsForDAG(const ActionsDAG & dag, const ContextPtr & context) } } -void filterBlockWithDAG(ActionsDAG dag, Block & block, ContextPtr context) +ExpressionActionsPtr buildFilterExpression(ActionsDAG dag, ContextPtr context) { buildSetsForDAG(dag, context); - auto actions = std::make_shared(std::move(dag)); - filterBlockWithExpression(actions, block); + return std::make_shared(std::move(dag)); } void filterBlockWithExpression(const ExpressionActionsPtr & actions, Block & block) @@ -384,7 +383,7 @@ void filterBlockWithPredicate(const ActionsDAG::Node * predicate, Block & block, { auto dag = splitFilterDagForAllowedInputs(predicate, &block, /*allow_non_deterministic_functions=*/ false); if (dag) - filterBlockWithDAG(std::move(*dag), block, context); + filterBlockWithExpression(buildFilterExpression(std::move(*dag), context), block); } } diff --git a/src/Storages/VirtualColumnUtils.h b/src/Storages/VirtualColumnUtils.h index 640f9db2fb8..73b7908b75c 100644 --- a/src/Storages/VirtualColumnUtils.h +++ b/src/Storages/VirtualColumnUtils.h @@ -23,7 +23,7 @@ namespace VirtualColumnUtils void filterBlockWithPredicate(const ActionsDAG::Node * predicate, Block & block, ContextPtr context); /// Just filters block. Block should contain all the required columns. -void filterBlockWithDAG(ActionsDAG dag, Block & block, ContextPtr context); +ExpressionActionsPtr buildFilterExpression(ActionsDAG dag, ContextPtr context); void filterBlockWithExpression(const ExpressionActionsPtr & actions, Block & block); /// Builds sets used by ActionsDAG inplace. From 454353215736a4c6da635e777b571be0f1bd1831 Mon Sep 17 00:00:00 2001 From: vdimir Date: Fri, 26 Jul 2024 18:33:48 +0200 Subject: [PATCH 273/661] Fix ShellCheck --- .../0_stateless/03203_client_benchmark_options.sh | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/queries/0_stateless/03203_client_benchmark_options.sh b/tests/queries/0_stateless/03203_client_benchmark_options.sh index 475309cebb9..cbbd8aab382 100755 --- a/tests/queries/0_stateless/03203_client_benchmark_options.sh +++ b/tests/queries/0_stateless/03203_client_benchmark_options.sh @@ -5,22 +5,22 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CUR_DIR"/../shell_config.sh output=$(${CLICKHOUSE_CLIENT} -t -q "SELECT sleepEachRow(2) FORMAT Null" 2>&1) -echo "$output" | grep -q "^2\." && echo "Ok" || { echo "Fail"; echo "$output"; } +{ echo "$output" | grep -q "^2\." && echo "Ok"; } || { echo "Fail"; echo "$output"; } output=$(${CLICKHOUSE_CLIENT} --time -q "SELECT sleepEachRow(2) FORMAT Null" 2>&1) -echo "$output" | grep -q "^2\." && echo "Ok" || { echo "Fail"; echo "$output"; } +{ echo "$output" | grep -q "^2\." && echo "Ok"; } || { echo "Fail"; echo "$output"; } output=$(${CLICKHOUSE_CLIENT} --memory-usage -q "SELECT sum(number) FROM numbers(10_000) FORMAT Null" 2>&1) -echo "$output" | grep -q "^[0-9]\+$" && echo "Ok" || { echo "Fail"; echo "$output"; } +{ echo "$output" | grep -q "^[0-9]\+$" && echo "Ok"; } || { echo "Fail"; echo "$output"; } output=$(${CLICKHOUSE_CLIENT} --memory-usage=none -q "SELECT sum(number) FROM numbers(10_000) FORMAT Null" 2>&1) echo -n "$output" # expected no output output=$(${CLICKHOUSE_CLIENT} --memory-usage=default -q "SELECT sum(number) FROM numbers(10_000) FORMAT Null" 2>&1) -echo "$output" | grep -q "^[0-9]\+$" && echo "Ok" || { echo "Fail"; echo "$output"; } +{ echo "$output" | grep -q "^[0-9]\+$" && echo "Ok"; } || { echo "Fail"; echo "$output"; } output=$(${CLICKHOUSE_CLIENT} --memory-usage=readable -q "SELECT sum(number) FROM numbers(10_000) FORMAT Null" 2>&1) -echo "$output" | grep -q "^[0-9].*B$" && echo "Ok" || { echo "Fail"; echo "$output"; } +{ echo "$output" | grep -q "^[0-9].*B$" && echo "Ok"; } || { echo "Fail"; echo "$output"; } output=$(${CLICKHOUSE_CLIENT} --memory-usage=unknown -q "SELECT sum(number) FROM numbers(10_000) FORMAT Null" 2>&1) -echo "$output" | grep -q "BAD_ARGUMENTS" && echo "Ok" || { echo "Fail"; echo "$output"; } +{ echo "$output" | grep -q "BAD_ARGUMENTS" && echo "Ok"; } || { echo "Fail"; echo "$output"; } From d153a1cf93e157acb7fadb5ca8b4f30fbd08bad5 Mon Sep 17 00:00:00 2001 From: vdimir Date: Fri, 26 Jul 2024 18:37:30 +0200 Subject: [PATCH 274/661] add quotes --- .../0_stateless/03203_client_benchmark_options.sh | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/queries/0_stateless/03203_client_benchmark_options.sh b/tests/queries/0_stateless/03203_client_benchmark_options.sh index cbbd8aab382..37a1f2cd3ac 100755 --- a/tests/queries/0_stateless/03203_client_benchmark_options.sh +++ b/tests/queries/0_stateless/03203_client_benchmark_options.sh @@ -5,22 +5,22 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CUR_DIR"/../shell_config.sh output=$(${CLICKHOUSE_CLIENT} -t -q "SELECT sleepEachRow(2) FORMAT Null" 2>&1) -{ echo "$output" | grep -q "^2\." && echo "Ok"; } || { echo "Fail"; echo "$output"; } +{ echo "$output" | grep -q "^2\." && echo "Ok"; } || { echo "Fail"; echo "'$output'"; } output=$(${CLICKHOUSE_CLIENT} --time -q "SELECT sleepEachRow(2) FORMAT Null" 2>&1) -{ echo "$output" | grep -q "^2\." && echo "Ok"; } || { echo "Fail"; echo "$output"; } +{ echo "$output" | grep -q "^2\." && echo "Ok"; } || { echo "Fail"; echo "'$output'"; } output=$(${CLICKHOUSE_CLIENT} --memory-usage -q "SELECT sum(number) FROM numbers(10_000) FORMAT Null" 2>&1) -{ echo "$output" | grep -q "^[0-9]\+$" && echo "Ok"; } || { echo "Fail"; echo "$output"; } +{ echo "$output" | grep -q "^[0-9]\+$" && echo "Ok"; } || { echo "Fail"; echo "'$output'"; } output=$(${CLICKHOUSE_CLIENT} --memory-usage=none -q "SELECT sum(number) FROM numbers(10_000) FORMAT Null" 2>&1) echo -n "$output" # expected no output output=$(${CLICKHOUSE_CLIENT} --memory-usage=default -q "SELECT sum(number) FROM numbers(10_000) FORMAT Null" 2>&1) -{ echo "$output" | grep -q "^[0-9]\+$" && echo "Ok"; } || { echo "Fail"; echo "$output"; } +{ echo "$output" | grep -q "^[0-9]\+$" && echo "Ok"; } || { echo "Fail"; echo "'$output'"; } output=$(${CLICKHOUSE_CLIENT} --memory-usage=readable -q "SELECT sum(number) FROM numbers(10_000) FORMAT Null" 2>&1) -{ echo "$output" | grep -q "^[0-9].*B$" && echo "Ok"; } || { echo "Fail"; echo "$output"; } +{ echo "$output" | grep -q "^[0-9].*B$" && echo "Ok"; } || { echo "Fail"; echo "'$output'"; } output=$(${CLICKHOUSE_CLIENT} --memory-usage=unknown -q "SELECT sum(number) FROM numbers(10_000) FORMAT Null" 2>&1) -{ echo "$output" | grep -q "BAD_ARGUMENTS" && echo "Ok"; } || { echo "Fail"; echo "$output"; } +{ echo "$output" | grep -q "BAD_ARGUMENTS" && echo "Ok"; } || { echo "Fail"; echo "'$output'"; } From f276be829bebd8e704e33565127034f3e258cc31 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Fri, 26 Jul 2024 16:59:41 +0000 Subject: [PATCH 275/661] Automatic style fix --- .../0_stateless/01676_clickhouse_client_autocomplete.python | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.python b/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.python index fe08a07c214..f363cb64018 100644 --- a/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.python +++ b/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.python @@ -22,7 +22,9 @@ STATE_MAP = { def run_with_timeout(func, args, timeout): for _ in range(5): state = multiprocessing.Value("i", -1) - process = multiprocessing.Process(target=func, args=args, kwargs={"state": state}) + process = multiprocessing.Process( + target=func, args=args, kwargs={"state": state} + ) process.start() process.join(timeout) From 343f1fa4bae219f7c287cb314ed6e04feb9a0de4 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 26 Jul 2024 17:42:06 +0000 Subject: [PATCH 276/661] Check type after optimize_rewrite_aggregate_function_with_if. --- .../RewriteAggregateFunctionWithIfPass.cpp | 32 ++++++++++++++++--- src/Analyzer/Resolve/QueryAnalyzer.cpp | 4 ++- 2 files changed, 30 insertions(+), 6 deletions(-) diff --git a/src/Analyzer/Passes/RewriteAggregateFunctionWithIfPass.cpp b/src/Analyzer/Passes/RewriteAggregateFunctionWithIfPass.cpp index c1adf05ac76..a48e88132a6 100644 --- a/src/Analyzer/Passes/RewriteAggregateFunctionWithIfPass.cpp +++ b/src/Analyzer/Passes/RewriteAggregateFunctionWithIfPass.cpp @@ -2,6 +2,7 @@ #include #include +#include #include #include @@ -42,7 +43,7 @@ public: if (lower_name.ends_with("if")) return; - auto & function_arguments_nodes = function_node->getArguments().getNodes(); + const auto & function_arguments_nodes = function_node->getArguments().getNodes(); if (function_arguments_nodes.size() != 1) return; @@ -50,6 +51,8 @@ public: if (!if_node || if_node->getFunctionName() != "if") return; + FunctionNodePtr replaced_node; + auto if_arguments_nodes = if_node->getArguments().getNodes(); auto * first_const_node = if_arguments_nodes[1]->as(); auto * second_const_node = if_arguments_nodes[2]->as(); @@ -75,8 +78,11 @@ public: new_arguments[0] = std::move(if_arguments_nodes[1]); new_arguments[1] = std::move(if_arguments_nodes[0]); - function_arguments_nodes = std::move(new_arguments); - resolveAggregateFunctionNodeByName(*function_node, function_node->getFunctionName() + "If"); + + replaced_node = std::make_shared(function_node->getFunctionName() + "If"); + replaced_node->getArguments().getNodes() = std::move(new_arguments); + replaced_node->getParameters().getNodes() = function_node->getParameters().getNodes(); + resolveAggregateFunctionNodeByName(*replaced_node, replaced_node->getFunctionName()); } } else if (first_const_node) @@ -104,10 +110,26 @@ public: FunctionFactory::instance().get("not", getContext())->build(not_function->getArgumentColumns())); new_arguments[1] = std::move(not_function); - function_arguments_nodes = std::move(new_arguments); - resolveAggregateFunctionNodeByName(*function_node, function_node->getFunctionName() + "If"); + replaced_node = std::make_shared(function_node->getFunctionName() + "If"); + replaced_node->getArguments().getNodes() = std::move(new_arguments); + replaced_node->getParameters().getNodes() = function_node->getParameters().getNodes(); + resolveAggregateFunctionNodeByName(*replaced_node, replaced_node->getFunctionName()); } } + + if (!replaced_node) + return; + + auto prev_type = function_node->getResultType(); + auto curr_type = replaced_node->getResultType(); + if (!prev_type->equals(*curr_type)) + return; + + /// Just in case, CAST compatible aggregate function states. + if (WhichDataType(prev_type).isAggregateFunction() && !DataTypeAggregateFunction::strictEquals(prev_type, curr_type)) + node = createCastFunction(std::move(replaced_node), prev_type, getContext()); + else + node = std::move(replaced_node); } }; diff --git a/src/Analyzer/Resolve/QueryAnalyzer.cpp b/src/Analyzer/Resolve/QueryAnalyzer.cpp index b1fe2554988..b1603bb18dd 100644 --- a/src/Analyzer/Resolve/QueryAnalyzer.cpp +++ b/src/Analyzer/Resolve/QueryAnalyzer.cpp @@ -3239,11 +3239,13 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi auto action = function_node_ptr->getNullsAction(); std::string aggregate_function_name = rewriteAggregateFunctionNameIfNeeded(function_name, action, scope.context); + std::cerr << "==================== " << function_name << " -> " << aggregate_function_name << std::endl; + AggregateFunctionProperties properties; auto aggregate_function = AggregateFunctionFactory::instance().get(aggregate_function_name, action, argument_types, parameters, properties); - + std::cerr << aggregate_function->getName() << ' ' << aggregate_function->getResultType()->getName() << std::endl; function_node.resolveAsAggregateFunction(std::move(aggregate_function)); return result_projection_names; From 4833b46a1a86bb1847d2520ea12ea4650c497abc Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 26 Jul 2024 17:43:30 +0000 Subject: [PATCH 277/661] Remove debug code --- src/Analyzer/Resolve/QueryAnalyzer.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/Analyzer/Resolve/QueryAnalyzer.cpp b/src/Analyzer/Resolve/QueryAnalyzer.cpp index b1603bb18dd..b1fe2554988 100644 --- a/src/Analyzer/Resolve/QueryAnalyzer.cpp +++ b/src/Analyzer/Resolve/QueryAnalyzer.cpp @@ -3239,13 +3239,11 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi auto action = function_node_ptr->getNullsAction(); std::string aggregate_function_name = rewriteAggregateFunctionNameIfNeeded(function_name, action, scope.context); - std::cerr << "==================== " << function_name << " -> " << aggregate_function_name << std::endl; - AggregateFunctionProperties properties; auto aggregate_function = AggregateFunctionFactory::instance().get(aggregate_function_name, action, argument_types, parameters, properties); - std::cerr << aggregate_function->getName() << ' ' << aggregate_function->getResultType()->getName() << std::endl; + function_node.resolveAsAggregateFunction(std::move(aggregate_function)); return result_projection_names; From 1ba44252cd20ab660d374970257a1ceb438236dd Mon Sep 17 00:00:00 2001 From: jsc0218 Date: Fri, 26 Jul 2024 18:33:50 +0000 Subject: [PATCH 278/661] turn sql to bash --- ...uery_views_log_background_thread.reference | 25 +----------- ...02572_query_views_log_background_thread.sh | 38 ++++++++++++++++++ ...2572_query_views_log_background_thread.sql | 40 ------------------- 3 files changed, 40 insertions(+), 63 deletions(-) create mode 100755 tests/queries/0_stateless/02572_query_views_log_background_thread.sh delete mode 100644 tests/queries/0_stateless/02572_query_views_log_background_thread.sql diff --git a/tests/queries/0_stateless/02572_query_views_log_background_thread.reference b/tests/queries/0_stateless/02572_query_views_log_background_thread.reference index f867fd0d085..d7f2272f5b4 100644 --- a/tests/queries/0_stateless/02572_query_views_log_background_thread.reference +++ b/tests/queries/0_stateless/02572_query_views_log_background_thread.reference @@ -1,25 +1,4 @@ --- { echoOn } -insert into buffer_02572 values (1); --- ensure that the flush was not direct -select * from buffer_02572; +OK +1 1 -select * from data_02572; -select * from copy_02572; --- we cannot use OPTIMIZE, this will attach query context, so let's wait -SET function_sleep_max_microseconds_per_block = 6000000; -select sleepEachRow(1) from numbers(3*2) format Null; -select sleepEachRow(1) from numbers(3*2) format Null; -system flush logs; -select count() > 0, lower(status::String), errorCodeToName(exception_code) - from system.query_views_log where - view_name = concatWithSeparator('.', currentDatabase(), 'mv_02572') and - view_target = concatWithSeparator('.', currentDatabase(), 'copy_02572') - group by 2, 3 -; 1 queryfinish OK -select * from buffer_02572; -1 -select * from data_02572; -1 -select * from copy_02572; -1 diff --git a/tests/queries/0_stateless/02572_query_views_log_background_thread.sh b/tests/queries/0_stateless/02572_query_views_log_background_thread.sh new file mode 100755 index 00000000000..a3e428e75c8 --- /dev/null +++ b/tests/queries/0_stateless/02572_query_views_log_background_thread.sh @@ -0,0 +1,38 @@ +#!/usr/bin/env bash + +# INSERT buffer_02572 -> data_02572 -> copy_02572 +# ^^ +# push to system.query_views_log + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +${CLICKHOUSE_CLIENT} --ignore-error --multiquery --query "drop table if exists buffer_02572; + drop table if exists data_02572; drop table if exists copy_02572; drop table if exists mv_02572;" + +${CLICKHOUSE_CLIENT} --query="create table copy_02572 (key Int) engine=Memory();" +${CLICKHOUSE_CLIENT} --query="create table data_02572 (key Int) engine=Memory();" +${CLICKHOUSE_CLIENT} --query="create table buffer_02572 (key Int) engine=Buffer(currentDatabase(), data_02572, 1, 3, 3, 1, 1e9, 1, 1e9);" +${CLICKHOUSE_CLIENT} --query="create materialized view mv_02572 to copy_02572 as select * from data_02572;" + +${CLICKHOUSE_CLIENT} --query="insert into buffer_02572 values (1);" + +# ensure that the flush was not direct +${CLICKHOUSE_CLIENT} --ignore-error --multiquery --query "select * from data_02572; select * from copy_02572;" + +# we cannot use OPTIMIZE, this will attach query context, so let's wait +for _ in {1..100}; do + $CLICKHOUSE_CLIENT -q "select * from data_02572;" | grep -q "1" && echo 'OK' && break + sleep 0.5 +done + + +${CLICKHOUSE_CLIENT} --ignore-error --multiquery --query "select * from data_02572; select * from copy_02572;" + +${CLICKHOUSE_CLIENT} --query="system flush logs;" +${CLICKHOUSE_CLIENT} --query="select count() > 0, lower(status::String), errorCodeToName(exception_code) + from system.query_views_log where + view_name = concatWithSeparator('.', currentDatabase(), 'mv_02572') and + view_target = concatWithSeparator('.', currentDatabase(), 'copy_02572') + group by 2, 3;" \ No newline at end of file diff --git a/tests/queries/0_stateless/02572_query_views_log_background_thread.sql b/tests/queries/0_stateless/02572_query_views_log_background_thread.sql deleted file mode 100644 index 2e9a62b71da..00000000000 --- a/tests/queries/0_stateless/02572_query_views_log_background_thread.sql +++ /dev/null @@ -1,40 +0,0 @@ --- INSERT buffer_02572 -> data_02572 -> copy_02572 --- ^^ --- push to system.query_views_log - -drop table if exists buffer_02572; -drop table if exists data_02572; -drop table if exists copy_02572; -drop table if exists mv_02572; - -create table copy_02572 (key Int) engine=Memory(); -create table data_02572 (key Int) engine=Memory(); -create table buffer_02572 (key Int) engine=Buffer(currentDatabase(), data_02572, 1, - /* never direct flush for flush from background thread */ - /* min_time= */ 3, 3, - 1, 1e9, - 1, 1e9); -create materialized view mv_02572 to copy_02572 as select * from data_02572; - --- { echoOn } -insert into buffer_02572 values (1); --- ensure that the flush was not direct -select * from buffer_02572; -select * from data_02572; -select * from copy_02572; --- we cannot use OPTIMIZE, this will attach query context, so let's wait -SET function_sleep_max_microseconds_per_block = 6000000; -select sleepEachRow(1) from numbers(3*2) format Null; -select sleepEachRow(1) from numbers(3*2) format Null; - -system flush logs; -select count() > 0, lower(status::String), errorCodeToName(exception_code) - from system.query_views_log where - view_name = concatWithSeparator('.', currentDatabase(), 'mv_02572') and - view_target = concatWithSeparator('.', currentDatabase(), 'copy_02572') - group by 2, 3 -; - -select * from buffer_02572; -select * from data_02572; -select * from copy_02572; \ No newline at end of file From 870ec237bb427243388acbe5bca770241eeb7fbb Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sat, 22 Jun 2024 14:14:11 +0200 Subject: [PATCH 279/661] Add ability to load dashboards for system.dashboards from config One of the obvious reasons is to allow rendering them with readonly user, which is not possible right now, due to usage of merge() function there. Another one, is to add some custom metrics. Note, that once set, they overrides the default dashboards preset. Signed-off-by: Azat Khuzhin --- programs/server/Server.cpp | 2 + programs/server/config.xml | 25 ++++++++++ src/Interpreters/Context.cpp | 49 +++++++++++++++++++ src/Interpreters/Context.h | 4 ++ .../System/StorageSystemDashboards.cpp | 26 +++++++--- src/Storages/System/StorageSystemDashboards.h | 2 +- .../test_custom_dashboards/__init__.py | 0 .../configs/config.d/overrides.xml | 15 ++++++ .../test_custom_dashboards/test.py | 35 +++++++++++++ 9 files changed, 149 insertions(+), 9 deletions(-) create mode 100644 tests/integration/test_custom_dashboards/__init__.py create mode 100644 tests/integration/test_custom_dashboards/configs/config.d/overrides.xml create mode 100644 tests/integration/test_custom_dashboards/test.py diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 16888015f8b..f8aea3ad10c 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -1582,6 +1582,8 @@ try global_context->setMacros(std::make_unique(*config, "macros", log)); global_context->setExternalAuthenticatorsConfig(*config); + global_context->setDashboardsConfig(config); + if (global_context->isServerCompletelyStarted()) { /// It does not make sense to reload anything before server has started. diff --git a/programs/server/config.xml b/programs/server/config.xml index 94825a55f67..5dedd78ff2a 100644 --- a/programs/server/config.xml +++ b/programs/server/config.xml @@ -1312,6 +1312,31 @@ event_date + INTERVAL 30 DAY + + + + - - create table views_max_insert_threads_null (a UInt64) Engine = Null - create materialized view views_max_insert_threads_mv Engine = Null AS select now() as ts, max(a) from views_max_insert_threads_null group by ts - - insert into views_max_insert_threads_null select * from numbers_mt(3000000000) settings max_threads = 16, max_insert_threads=16 - - drop table if exists views_max_insert_threads_null - drop table if exists views_max_insert_threads_mv - - + + + + + + + + + + From b2d8eaf1e6d67ab76f3e86cd4fd857e9535a9d20 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 29 Jul 2024 18:55:08 +0200 Subject: [PATCH 403/661] Debug TimerDescriptor --- src/Common/TimerDescriptor.cpp | 26 ++++++++++++++++++++++++-- src/IO/MMappedFileDescriptor.cpp | 2 -- 2 files changed, 24 insertions(+), 4 deletions(-) diff --git a/src/Common/TimerDescriptor.cpp b/src/Common/TimerDescriptor.cpp index b36ea4059cb..9a171ae9487 100644 --- a/src/Common/TimerDescriptor.cpp +++ b/src/Common/TimerDescriptor.cpp @@ -2,9 +2,11 @@ #include #include +#include #include #include +#include namespace DB @@ -89,9 +91,29 @@ void TimerDescriptor::drain() const /// A signal happened, need to retry. if (errno == EINTR) - continue; + { + /** This is to help with debugging. + * + * Sometimes reading from timer_fd blocks, which should not happen, because we opened it in a non-blocking mode. + * But it could be possible if a rogue 3rd-party library closed our file descriptor by mistake + * (for example by double closing due to the lack of exception safety or if it is a crappy code in plain C) + * and then another file descriptor is opened in its place. + * + * Let's try to get a name of this file descriptor and log it. + */ + LoggerPtr log = getLogger("TimerDescriptor"); - throw ErrnoException(ErrorCodes::CANNOT_READ_FROM_SOCKET, "Cannot drain timer_fd"); + static constexpr ssize_t max_link_path_length = 256; + char link_path[max_link_path_length]; + ssize_t link_path_length = readlink(fmt::format("/proc/self/fd/{}", timer_fd).c_str(), link_path, max_link_path_length); + if (-1 == link_path_length) + throw ErrnoException(ErrorCodes::CANNOT_READ_FROM_SOCKET, "Cannot readlink for a timer_fd {}", timer_fd); + + LOG_TRACE(log, "Received EINTR while trying to drain a TimerDescriptor, fd {}: {}", timer_fd, std::string_view(link_path, link_path_length)); + continue; + } + + throw ErrnoException(ErrorCodes::CANNOT_READ_FROM_SOCKET, "Cannot drain timer_fd {}", timer_fd); } chassert(res == sizeof(buf)); diff --git a/src/IO/MMappedFileDescriptor.cpp b/src/IO/MMappedFileDescriptor.cpp index a7eb8e4ede5..47f80005c9d 100644 --- a/src/IO/MMappedFileDescriptor.cpp +++ b/src/IO/MMappedFileDescriptor.cpp @@ -3,8 +3,6 @@ #include #include -#include - #include #include #include From 45db7c85cf25f9b4b27cedd7464a786c53580d3f Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Mon, 29 Jul 2024 16:57:15 +0000 Subject: [PATCH 404/661] Remove has_single_port property from plan stream. --- src/Processors/QueryPlan/AggregatingStep.cpp | 2 -- src/Processors/QueryPlan/FillingStep.cpp | 8 +++----- src/Processors/QueryPlan/IQueryPlanStep.h | 6 +----- src/Processors/QueryPlan/ITransformingStep.cpp | 3 --- src/Processors/QueryPlan/ReadNothingStep.cpp | 2 +- 5 files changed, 5 insertions(+), 16 deletions(-) diff --git a/src/Processors/QueryPlan/AggregatingStep.cpp b/src/Processors/QueryPlan/AggregatingStep.cpp index f31de80b22d..8a5ed7fde65 100644 --- a/src/Processors/QueryPlan/AggregatingStep.cpp +++ b/src/Processors/QueryPlan/AggregatingStep.cpp @@ -134,7 +134,6 @@ AggregatingStep::AggregatingStep( { output_stream->sort_description = group_by_sort_description; output_stream->sort_scope = DataStream::SortScope::Global; - output_stream->has_single_port = true; } } @@ -147,7 +146,6 @@ void AggregatingStep::applyOrder(SortDescription sort_description_for_merging_, { output_stream->sort_description = group_by_sort_description; output_stream->sort_scope = DataStream::SortScope::Global; - output_stream->has_single_port = true; } explicit_sorting_required_for_aggregation_in_order = false; diff --git a/src/Processors/QueryPlan/FillingStep.cpp b/src/Processors/QueryPlan/FillingStep.cpp index 65c9cf11661..81622389ada 100644 --- a/src/Processors/QueryPlan/FillingStep.cpp +++ b/src/Processors/QueryPlan/FillingStep.cpp @@ -39,12 +39,13 @@ FillingStep::FillingStep( , interpolate_description(interpolate_description_) , use_with_fill_by_sorting_prefix(use_with_fill_by_sorting_prefix_) { - if (!input_stream_.has_single_port) - throw Exception(ErrorCodes::LOGICAL_ERROR, "FillingStep expects single input"); } void FillingStep::transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) { + if (pipeline.getNumStreams() != 1) + throw Exception(ErrorCodes::LOGICAL_ERROR, "FillingStep expects single input"); + pipeline.addSimpleTransform([&](const Block & header, QueryPipelineBuilder::StreamType stream_type) -> ProcessorPtr { if (stream_type == QueryPipelineBuilder::StreamType::Totals) @@ -69,9 +70,6 @@ void FillingStep::describeActions(JSONBuilder::JSONMap & map) const void FillingStep::updateOutputStream() { - if (!input_streams.front().has_single_port) - throw Exception(ErrorCodes::LOGICAL_ERROR, "FillingStep expects single input"); - output_stream = createOutputStream( input_streams.front(), FillingTransform::transformHeader(input_streams.front().header, sort_description), getDataStreamTraits()); } diff --git a/src/Processors/QueryPlan/IQueryPlanStep.h b/src/Processors/QueryPlan/IQueryPlanStep.h index daca88fcceb..44eb7ea0c59 100644 --- a/src/Processors/QueryPlan/IQueryPlanStep.h +++ b/src/Processors/QueryPlan/IQueryPlanStep.h @@ -28,9 +28,6 @@ class DataStream public: Block header; - /// QueryPipeline has single port. Totals or extremes ports are not counted. - bool has_single_port = false; - /// Sorting scope. Please keep the mutual order (more strong mode should have greater value). enum class SortScope : uint8_t { @@ -51,8 +48,7 @@ public: bool hasEqualPropertiesWith(const DataStream & other) const { - return has_single_port == other.has_single_port - && sort_description == other.sort_description + return sort_description == other.sort_description && (sort_description.empty() || sort_scope == other.sort_scope); } diff --git a/src/Processors/QueryPlan/ITransformingStep.cpp b/src/Processors/QueryPlan/ITransformingStep.cpp index 9ecfdb0af22..3fa9d1b8308 100644 --- a/src/Processors/QueryPlan/ITransformingStep.cpp +++ b/src/Processors/QueryPlan/ITransformingStep.cpp @@ -20,9 +20,6 @@ DataStream ITransformingStep::createOutputStream( { DataStream output_stream{.header = std::move(output_header)}; - output_stream.has_single_port = stream_traits.returns_single_stream - || (input_stream.has_single_port && stream_traits.preserves_number_of_streams); - if (stream_traits.preserves_sorting) { output_stream.sort_description = input_stream.sort_description; diff --git a/src/Processors/QueryPlan/ReadNothingStep.cpp b/src/Processors/QueryPlan/ReadNothingStep.cpp index 253f3a5b980..3037172bbd4 100644 --- a/src/Processors/QueryPlan/ReadNothingStep.cpp +++ b/src/Processors/QueryPlan/ReadNothingStep.cpp @@ -6,7 +6,7 @@ namespace DB { ReadNothingStep::ReadNothingStep(Block output_header) - : ISourceStep(DataStream{.header = std::move(output_header), .has_single_port = true}) + : ISourceStep(DataStream{.header = std::move(output_header)}) { } From 412268bf4e64b5c0df3980e5b8ccd2b078cf2177 Mon Sep 17 00:00:00 2001 From: avogar Date: Mon, 29 Jul 2024 17:05:46 +0000 Subject: [PATCH 405/661] Update reference --- ...dynamic_read_subcolumns_small.reference.j2 | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/tests/queries/0_stateless/03036_dynamic_read_subcolumns_small.reference.j2 b/tests/queries/0_stateless/03036_dynamic_read_subcolumns_small.reference.j2 index 3d814e1205a..be3f4e53990 100644 --- a/tests/queries/0_stateless/03036_dynamic_read_subcolumns_small.reference.j2 +++ b/tests/queries/0_stateless/03036_dynamic_read_subcolumns_small.reference.j2 @@ -25,26 +25,26 @@ UInt64 7 7 \N [] 8 8 \N [] 9 9 \N [] -[[0]] \N \N [] str_10 \N str_10 [] -[[0,1]] \N \N [] +[[0]] \N \N [] str_11 \N str_11 [] -[[0,1,2]] \N \N [] +[[0,1]] \N \N [] str_12 \N str_12 [] -[[0,1,2,3]] \N \N [] +[[0,1,2]] \N \N [] str_13 \N str_13 [] -[[0,1,2,3,4]] \N \N [] +[[0,1,2,3]] \N \N [] str_14 \N str_14 [] -[[0,1,2,3,4,5]] \N \N [] +[[0,1,2,3,4]] \N \N [] str_15 \N str_15 [] -[[0,1,2,3,4,5,6]] \N \N [] +[[0,1,2,3,4,5]] \N \N [] str_16 \N str_16 [] -[[0,1,2,3,4,5,6,7]] \N \N [] +[[0,1,2,3,4,5,6]] \N \N [] str_17 \N str_17 [] -[[0,1,2,3,4,5,6,7,8]] \N \N [] +[[0,1,2,3,4,5,6,7]] \N \N [] str_18 \N str_18 [] -[[0,1,2,3,4,5,6,7,8,9]] \N \N [] +[[0,1,2,3,4,5,6,7,8]] \N \N [] str_19 \N str_19 [] +[[0,1,2,3,4,5,6,7,8,9]] \N \N [] [20] \N \N [20] ['str_21','str_21'] \N \N ['str_21','str_21'] [22,22,22] \N \N [22,22,22] From 812a2b929938c293441f6a893adf96a00d469351 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Mon, 29 Jul 2024 17:24:28 +0000 Subject: [PATCH 406/661] formatDateTime[InJodaSyntax]: make format string optional --- .../functions/type-conversion-functions.md | 12 ++++---- src/Functions/parseDateTime.cpp | 30 ++++++++++++------- .../02668_parse_datetime.reference | 6 +++- .../0_stateless/02668_parse_datetime.sql | 7 ++++- ...68_parse_datetime_in_joda_syntax.reference | 6 +++- .../02668_parse_datetime_in_joda_syntax.sql | 7 ++++- 6 files changed, 48 insertions(+), 20 deletions(-) diff --git a/docs/en/sql-reference/functions/type-conversion-functions.md b/docs/en/sql-reference/functions/type-conversion-functions.md index 7cc2c022143..dc90697bd20 100644 --- a/docs/en/sql-reference/functions/type-conversion-functions.md +++ b/docs/en/sql-reference/functions/type-conversion-functions.md @@ -3467,13 +3467,13 @@ This function is the opposite operation of function [formatDateTime](../function **Syntax** ``` sql -parseDateTime(str, format[, timezone]) +parseDateTime(str[, format[, timezone]]) ``` **Arguments** -- `str` — the String to be parsed -- `format` — the format string +- `str` — The String to be parsed +- `format` — The format string. Optional. `%Y-%m-%d %H:%i:%s` if not specified. - `timezone` — [Timezone](/docs/en/operations/server-configuration-parameters/settings.md/#server_configuration_parameters-timezone). Optional. **Returned value(s)** @@ -3516,13 +3516,13 @@ This function is the opposite operation of function [formatDateTimeInJodaSyntax] **Syntax** ``` sql -parseDateTimeInJodaSyntax(str, format[, timezone]) +parseDateTimeInJodaSyntax(str[, format[, timezone]]) ``` **Arguments** -- `str` — the String to be parsed -- `format` — the format string +- `str` — The String to be parsed +- `format` — The format string. Optional. `yyyy-MM-dd HH:mm:ss` if not specified. - `timezone` — [Timezone](/docs/en/operations/server-configuration-parameters/settings.md/#server_configuration_parameters-timezone). Optional. **Returned value(s)** diff --git a/src/Functions/parseDateTime.cpp b/src/Functions/parseDateTime.cpp index bdca0151bba..7ca10677be7 100644 --- a/src/Functions/parseDateTime.cpp +++ b/src/Functions/parseDateTime.cpp @@ -582,11 +582,11 @@ namespace DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { FunctionArgumentDescriptors mandatory_args{ - {"time", static_cast(&isString), nullptr, "String"}, - {"format", static_cast(&isString), nullptr, "String"} + {"time", static_cast(&isString), nullptr, "String"} }; FunctionArgumentDescriptors optional_args{ + {"format", static_cast(&isString), nullptr, "String"}, {"timezone", static_cast(&isString), &isColumnConst, "const String"} }; @@ -2029,14 +2029,24 @@ namespace String getFormat(const ColumnsWithTypeAndName & arguments) const { - const auto * format_column = checkAndGetColumnConst(arguments[1].column.get()); - if (!format_column) - throw Exception( - ErrorCodes::ILLEGAL_COLUMN, - "Illegal column {} of second ('format') argument of function {}. Must be constant string.", - arguments[1].column->getName(), - getName()); - return format_column->getValue(); + if (arguments.size() == 1) + { + if constexpr (parse_syntax == ParseSyntax::MySQL) + return "%Y-%m-%d %H:%i:%s"; + else + return "yyyy-MM-dd HH:mm:ss"; + } + else + { + const auto * col_format = checkAndGetColumnConst(arguments[1].column.get()); + if (!col_format) + throw Exception( + ErrorCodes::ILLEGAL_COLUMN, + "Illegal column {} of second ('format') argument of function {}. Must be constant string.", + arguments[1].column->getName(), + getName()); + return col_format->getValue(); + } } const DateLUTImpl & getTimeZone(const ColumnsWithTypeAndName & arguments) const diff --git a/tests/queries/0_stateless/02668_parse_datetime.reference b/tests/queries/0_stateless/02668_parse_datetime.reference index d21a51ce70c..b67ca2d8b76 100644 --- a/tests/queries/0_stateless/02668_parse_datetime.reference +++ b/tests/queries/0_stateless/02668_parse_datetime.reference @@ -239,7 +239,7 @@ select sTr_To_DaTe('10:04:11 03-07-2019', '%s:%i:%H %d-%m-%Y', 'UTC') = toDateTi select str_to_date('10:04:11 invalid 03-07-2019', '%s:%i:%H %d-%m-%Y', 'UTC') IS NULL; 1 -- Error handling -select parseDateTime('12 AM'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +select parseDateTime(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } select parseDateTime('12 AM', '%h %p', 'UTC', 'a fourth argument'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } -- Fuzzer crash bug #53715 select parseDateTime('', '', toString(number)) from numbers(13); -- { serverError ILLEGAL_COLUMN } @@ -270,3 +270,7 @@ select parseDateTime('8 13, 2022, 7:58:32', '%c %e, %G, %k:%i:%s', 'UTC'); 2022-08-13 07:58:32 select parseDateTime('08 13, 2022, 07:58:32', '%c %e, %G, %k:%i:%s', 'UTC'); 2022-08-13 07:58:32 +-- The format string argument is optional +set session_timezone = 'UTC'; -- don't randomize the session timezone +select parseDateTime('2021-01-04 23:12:34') = toDateTime('2021-01-04 23:12:34'); +1 diff --git a/tests/queries/0_stateless/02668_parse_datetime.sql b/tests/queries/0_stateless/02668_parse_datetime.sql index 02ac0c5f35c..7b3aed60a4a 100644 --- a/tests/queries/0_stateless/02668_parse_datetime.sql +++ b/tests/queries/0_stateless/02668_parse_datetime.sql @@ -162,7 +162,7 @@ select sTr_To_DaTe('10:04:11 03-07-2019', '%s:%i:%H %d-%m-%Y', 'UTC') = toDateTi select str_to_date('10:04:11 invalid 03-07-2019', '%s:%i:%H %d-%m-%Y', 'UTC') IS NULL; -- Error handling -select parseDateTime('12 AM'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +select parseDateTime(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } select parseDateTime('12 AM', '%h %p', 'UTC', 'a fourth argument'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } -- Fuzzer crash bug #53715 @@ -187,4 +187,9 @@ select parseDateTime('08 13, 2022, 07:58:32', '%m %e, %G, %k:%i:%s', 'UTC'); select parseDateTime('8 13, 2022, 7:58:32', '%c %e, %G, %k:%i:%s', 'UTC'); select parseDateTime('08 13, 2022, 07:58:32', '%c %e, %G, %k:%i:%s', 'UTC'); +-- The format string argument is optional +set session_timezone = 'UTC'; -- don't randomize the session timezone +select parseDateTime('2021-01-04 23:12:34') = toDateTime('2021-01-04 23:12:34'); + + -- { echoOff } diff --git a/tests/queries/0_stateless/02668_parse_datetime_in_joda_syntax.reference b/tests/queries/0_stateless/02668_parse_datetime_in_joda_syntax.reference index 9fbf105dc41..6f560577ab5 100644 --- a/tests/queries/0_stateless/02668_parse_datetime_in_joda_syntax.reference +++ b/tests/queries/0_stateless/02668_parse_datetime_in_joda_syntax.reference @@ -354,5 +354,9 @@ select parseDateTimeInJodaSyntaxOrNull('2001 366 2000', 'yyyy D yyyy', 'UTC') = select parseDateTimeInJodaSyntaxOrNull('2001 invalid 366 2000', 'yyyy D yyyy', 'UTC') IS NULL; 1 -- Error handling -select parseDateTimeInJodaSyntax('12 AM'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +select parseDateTimeInJodaSyntax(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } select parseDateTimeInJodaSyntax('12 AM', 'h a', 'UTC', 'a fourth argument'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +-- The format string argument is optional +set session_timezone = 'UTC'; -- don't randomize the session timezone +select parseDateTimeInJodaSyntax('2021-01-04 23:12:34') = toDateTime('2021-01-04 23:12:34'); +1 diff --git a/tests/queries/0_stateless/02668_parse_datetime_in_joda_syntax.sql b/tests/queries/0_stateless/02668_parse_datetime_in_joda_syntax.sql index f5810d3d4c3..28d14607ba6 100644 --- a/tests/queries/0_stateless/02668_parse_datetime_in_joda_syntax.sql +++ b/tests/queries/0_stateless/02668_parse_datetime_in_joda_syntax.sql @@ -239,6 +239,11 @@ select parseDateTimeInJodaSyntaxOrNull('2001 366 2000', 'yyyy D yyyy', 'UTC') = select parseDateTimeInJodaSyntaxOrNull('2001 invalid 366 2000', 'yyyy D yyyy', 'UTC') IS NULL; -- Error handling -select parseDateTimeInJodaSyntax('12 AM'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +select parseDateTimeInJodaSyntax(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } select parseDateTimeInJodaSyntax('12 AM', 'h a', 'UTC', 'a fourth argument'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } + +-- The format string argument is optional +set session_timezone = 'UTC'; -- don't randomize the session timezone +select parseDateTimeInJodaSyntax('2021-01-04 23:12:34') = toDateTime('2021-01-04 23:12:34'); + -- { echoOff } From 1c9d60ca972eab618b82704e14f1a680daed9c04 Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Mon, 15 Jul 2024 16:36:45 +0000 Subject: [PATCH 407/661] Refactoring --- programs/client/Client.h | 6 +- programs/local/LocalServer.h | 4 +- src/Client/ClientApplicationBase.cpp | 539 +++++++++++++++++++++++++++ src/Client/ClientApplicationBase.h | 54 +++ src/Client/ClientBase.cpp | 362 +----------------- src/Client/ClientBase.h | 73 +++- 6 files changed, 659 insertions(+), 379 deletions(-) create mode 100644 src/Client/ClientApplicationBase.cpp create mode 100644 src/Client/ClientApplicationBase.h diff --git a/programs/client/Client.h b/programs/client/Client.h index 9571440d6ba..7fdf77031ab 100644 --- a/programs/client/Client.h +++ b/programs/client/Client.h @@ -1,14 +1,16 @@ #pragma once -#include +#include namespace DB { -class Client : public ClientBase +class Client : public ClientApplicationBase { public: + using Arguments = ClientApplicationBase::Arguments; + Client() = default; void initialize(Poco::Util::Application & self) override; diff --git a/programs/local/LocalServer.h b/programs/local/LocalServer.h index ae9980311e1..b18a7a90961 100644 --- a/programs/local/LocalServer.h +++ b/programs/local/LocalServer.h @@ -1,6 +1,6 @@ #pragma once -#include +#include #include #include @@ -21,7 +21,7 @@ namespace DB /// Lightweight Application for clickhouse-local /// No networking, no extra configs and working directories, no pid and status files, no dictionaries, no logging. /// Quiet mode by default -class LocalServer : public ClientBase, public Loggers +class LocalServer : public ClientApplicationBase, public Loggers { public: LocalServer() = default; diff --git a/src/Client/ClientApplicationBase.cpp b/src/Client/ClientApplicationBase.cpp new file mode 100644 index 00000000000..59c98983694 --- /dev/null +++ b/src/Client/ClientApplicationBase.cpp @@ -0,0 +1,539 @@ +#include + +#include +#include +#include +#include +#include +#include + +#include +#include "config.h" + +#include +#include +#include +#include +#include + +using namespace std::literals; + +namespace CurrentMetrics +{ + extern const Metric MemoryTracking; +} + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; + extern const int CANNOT_SET_SIGNAL_HANDLER; + extern const int UNRECOGNIZED_ARGUMENTS; +} + +static ClientInfo::QueryKind parseQueryKind(const String & query_kind) +{ + if (query_kind == "initial_query") + return ClientInfo::QueryKind::INITIAL_QUERY; + if (query_kind == "secondary_query") + return ClientInfo::QueryKind::SECONDARY_QUERY; + if (query_kind == "no_query") + return ClientInfo::QueryKind::NO_QUERY; + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown query kind {}", query_kind); +} + +/// This signal handler is set only for SIGINT and SIGQUIT. +void interruptSignalHandler(int signum) +{ + if (ClientApplicationBase::getInstance().tryStopQuery()) + safeExit(128 + signum); +} + +ClientApplicationBase::~ClientApplicationBase() = default; +ClientApplicationBase::ClientApplicationBase() : ClientBase(STDIN_FILENO, STDOUT_FILENO, STDERR_FILENO, std::cin, std::cout, std::cerr) {} + +ClientApplicationBase & ClientApplicationBase::getInstance() +{ + return dynamic_cast(Poco::Util::Application::instance()); +} + +void ClientApplicationBase::setupSignalHandler() +{ + ClientApplicationBase::getInstance().stopQuery(); + + struct sigaction new_act; + memset(&new_act, 0, sizeof(new_act)); + + new_act.sa_handler = interruptSignalHandler; + new_act.sa_flags = 0; + +#if defined(OS_DARWIN) + sigemptyset(&new_act.sa_mask); +#else + if (sigemptyset(&new_act.sa_mask)) + throw ErrnoException(ErrorCodes::CANNOT_SET_SIGNAL_HANDLER, "Cannot set signal handler"); +#endif + + if (sigaction(SIGINT, &new_act, nullptr)) + throw ErrnoException(ErrorCodes::CANNOT_SET_SIGNAL_HANDLER, "Cannot set signal handler"); + + if (sigaction(SIGQUIT, &new_act, nullptr)) + throw ErrnoException(ErrorCodes::CANNOT_SET_SIGNAL_HANDLER, "Cannot set signal handler"); +} + + +namespace +{ + +/// Define transparent hash to we can use +/// std::string_view with the containers +struct TransparentStringHash +{ + using is_transparent = void; + size_t operator()(std::string_view txt) const + { + return std::hash{}(txt); + } +}; + +/* + * This functor is used to parse command line arguments and replace dashes with underscores, + * allowing options to be specified using either dashes or underscores. + */ +class OptionsAliasParser +{ +public: + explicit OptionsAliasParser(const boost::program_options::options_description& options) + { + options_names.reserve(options.options().size()); + for (const auto& option : options.options()) + options_names.insert(option->long_name()); + } + + /* + * Parses arguments by replacing dashes with underscores, and matches the resulting name with known options + * Implements boost::program_options::ext_parser logic + */ + std::pair operator()(const std::string & token) const + { + if (!token.starts_with("--")) + return {}; + std::string arg = token.substr(2); + + // divide token by '=' to separate key and value if options style=long_allow_adjacent + auto pos_eq = arg.find('='); + std::string key = arg.substr(0, pos_eq); + + if (options_names.contains(key)) + // option does not require any changes, because it is already correct + return {}; + + std::replace(key.begin(), key.end(), '-', '_'); + if (!options_names.contains(key)) + // after replacing '-' with '_' argument is still unknown + return {}; + + std::string value; + if (pos_eq != std::string::npos && pos_eq < arg.size()) + value = arg.substr(pos_eq + 1); + + return {key, value}; + } + +private: + std::unordered_set options_names; +}; + +} + +/// Enable optimizations even in debug builds because otherwise options parsing becomes extremely slow affecting .sh tests +#if defined(__clang__) +#pragma clang optimize on +#endif +void ClientApplicationBase::parseAndCheckOptions(OptionsDescription & options_description, po::variables_map & options, Arguments & arguments) +{ + if (allow_repeated_settings) + addProgramOptionsAsMultitokens(cmd_settings, options_description.main_description.value()); + else + addProgramOptions(cmd_settings, options_description.main_description.value()); + + if (allow_merge_tree_settings) + { + /// Add merge tree settings manually, because names of some settings + /// may clash. Query settings have higher priority and we just + /// skip ambiguous merge tree settings. + auto & main_options = options_description.main_description.value(); + + std::unordered_set> main_option_names; + for (const auto & option : main_options.options()) + main_option_names.insert(option->long_name()); + + for (const auto & setting : cmd_merge_tree_settings.all()) + { + const auto add_setting = [&](const std::string_view name) + { + if (auto it = main_option_names.find(name); it != main_option_names.end()) + return; + + if (allow_repeated_settings) + addProgramOptionAsMultitoken(cmd_merge_tree_settings, main_options, name, setting); + else + addProgramOption(cmd_merge_tree_settings, main_options, name, setting); + }; + + const auto & setting_name = setting.getName(); + + add_setting(setting_name); + + const auto & settings_to_aliases = MergeTreeSettings::Traits::settingsToAliases(); + if (auto it = settings_to_aliases.find(setting_name); it != settings_to_aliases.end()) + { + for (const auto alias : it->second) + { + add_setting(alias); + } + } + } + } + + /// Parse main commandline options. + auto parser = po::command_line_parser(arguments) + .options(options_description.main_description.value()) + .extra_parser(OptionsAliasParser(options_description.main_description.value())) + .allow_unregistered(); + po::parsed_options parsed = parser.run(); + + /// Check unrecognized options without positional options. + auto unrecognized_options = po::collect_unrecognized(parsed.options, po::collect_unrecognized_mode::exclude_positional); + if (!unrecognized_options.empty()) + { + auto hints = this->getHints(unrecognized_options[0]); + if (!hints.empty()) + throw Exception(ErrorCodes::UNRECOGNIZED_ARGUMENTS, "Unrecognized option '{}'. Maybe you meant {}", + unrecognized_options[0], toString(hints)); + + throw Exception(ErrorCodes::UNRECOGNIZED_ARGUMENTS, "Unrecognized option '{}'", unrecognized_options[0]); + } + + /// Check positional options. + for (const auto & op : parsed.options) + { + if (!op.unregistered && op.string_key.empty() && !op.original_tokens[0].starts_with("--") + && !op.original_tokens[0].empty() && !op.value.empty()) + { + /// Two special cases for better usability: + /// - if the option contains a whitespace, it might be a query: clickhouse "SELECT 1" + /// These are relevant for interactive usage - user-friendly, but questionable in general. + /// In case of ambiguity or for scripts, prefer using proper options. + + const auto & token = op.original_tokens[0]; + po::variable_value value(boost::any(op.value), false); + + const char * option; + if (token.contains(' ')) + option = "query"; + else + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Positional option `{}` is not supported.", token); + + if (!options.emplace(option, value).second) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Positional option `{}` is not supported.", token); + } + } + + po::store(parsed, options); +} + +void ClientApplicationBase::addMultiquery(std::string_view query, Arguments & common_arguments) const +{ + common_arguments.emplace_back("--multiquery"); + common_arguments.emplace_back("-q"); + common_arguments.emplace_back(query); +} + +Poco::Util::LayeredConfiguration & ClientApplicationBase::getClientConfiguration() +{ + return config(); +} + +void ClientApplicationBase::init(int argc, char ** argv) +{ + namespace po = boost::program_options; + + /// Don't parse options with Poco library, we prefer neat boost::program_options. + stopOptionsProcessing(); + + stdin_is_a_tty = isatty(STDIN_FILENO); + stdout_is_a_tty = isatty(STDOUT_FILENO); + stderr_is_a_tty = isatty(STDERR_FILENO); + terminal_width = getTerminalWidth(); + + std::vector external_tables_arguments; + Arguments common_arguments = {""}; /// 0th argument is ignored. + std::vector hosts_and_ports_arguments; + + if (argc) + argv0 = argv[0]; + readArguments(argc, argv, common_arguments, external_tables_arguments, hosts_and_ports_arguments); + + /// Support for Unicode dashes + /// Interpret Unicode dashes as default double-hyphen + for (auto & arg : common_arguments) + { + // replace em-dash(U+2014) + boost::replace_all(arg, "—", "--"); + // replace en-dash(U+2013) + boost::replace_all(arg, "–", "--"); + // replace mathematical minus(U+2212) + boost::replace_all(arg, "−", "--"); + } + + + OptionsDescription options_description; + options_description.main_description.emplace(createOptionsDescription("Main options", terminal_width)); + + /// Common options for clickhouse-client and clickhouse-local. + options_description.main_description->add_options() + ("help", "print usage summary, combine with --verbose to display all options") + ("verbose", "print query and other debugging info") + ("version,V", "print version information and exit") + ("version-clean", "print version in machine-readable format and exit") + + ("config-file,C", po::value(), "config-file path") + + ("query,q", po::value>()->multitoken(), R"(Query. Can be specified multiple times (--query "SELECT 1" --query "SELECT 2") or once with multiple comma-separated queries (--query "SELECT 1; SELECT 2;"). In the latter case, INSERT queries with non-VALUE format must be separated by empty lines.)") + ("queries-file", po::value>()->multitoken(), "file path with queries to execute; multiple files can be specified (--queries-file file1 file2...)") + ("multiquery,n", "Obsolete, does nothing") + ("multiline,m", "If specified, allow multiline queries (do not send the query on Enter)") + ("database,d", po::value(), "database") + ("query_kind", po::value()->default_value("initial_query"), "One of initial_query/secondary_query/no_query") + ("query_id", po::value(), "query_id") + + ("history_file", po::value(), "path to history file") + + ("stage", po::value()->default_value("complete"), "Request query processing up to specified stage: complete,fetch_columns,with_mergeable_state,with_mergeable_state_after_aggregation,with_mergeable_state_after_aggregation_and_limit") + ("progress", po::value()->implicit_value(ProgressOption::TTY, "tty")->default_value(ProgressOption::DEFAULT, "default"), "Print progress of queries execution - to TTY: tty|on|1|true|yes; to STDERR non-interactive mode: err; OFF: off|0|false|no; DEFAULT - interactive to TTY, non-interactive is off") + + ("disable_suggestion,A", "Disable loading suggestion data. Note that suggestion data is loaded asynchronously through a second connection to ClickHouse server. Also it is reasonable to disable suggestion if you want to paste a query with TAB characters. Shorthand option -A is for those who get used to mysql client.") + ("wait_for_suggestions_to_load", "Load suggestion data synchonously.") + ("time,t", "print query execution time to stderr in non-interactive mode (for benchmarks)") + ("memory-usage", po::value()->implicit_value("default")->default_value("none"), "print memory usage to stderr in non-interactive mode (for benchmarks). Values: 'none', 'default', 'readable'") + + ("echo", "in batch mode, print query before execution") + + ("log-level", po::value(), "log level") + ("server_logs_file", po::value(), "put server logs into specified file") + + ("suggestion_limit", po::value()->default_value(10000), "Suggestion limit for how many databases, tables and columns to fetch.") + + ("format,f", po::value(), "default output format (and input format for clickhouse-local)") + ("output-format", po::value(), "default output format (this option has preference over --format)") + + ("vertical,E", "vertical output format, same as --format=Vertical or FORMAT Vertical or \\G at end of command") + ("highlight", po::value()->default_value(true), "enable or disable basic syntax highlight in interactive command line") + + ("ignore-error", "do not stop processing when an error occurs") + ("stacktrace", "print stack traces of exceptions") + ("hardware-utilization", "print hardware utilization information in progress bar") + ("print-profile-events", po::value(&profile_events.print)->zero_tokens(), "Printing ProfileEvents packets") + ("profile-events-delay-ms", po::value()->default_value(profile_events.delay_ms), "Delay between printing `ProfileEvents` packets (-1 - print only totals, 0 - print every single packet)") + ("processed-rows", "print the number of locally processed rows") + + ("interactive", "Process queries-file or --query query and start interactive mode") + ("pager", po::value(), "Pipe all output into this command (less or similar)") + ("max_memory_usage_in_client", po::value(), "Set memory limit in client/local server") + + ("fuzzer-args", po::value(), "Command line arguments for the LLVM's libFuzzer driver. Only relevant if the application is compiled with libFuzzer.") + + ("client_logs_file", po::value(), "Path to a file for writing client logs. Currently we only have fatal logs (when the client crashes)") + ; + + addOptions(options_description); + + OptionsDescription options_description_non_verbose = options_description; + + auto getter = [](const auto & op) + { + String op_long_name = op->long_name(); + return "--" + String(op_long_name); + }; + + if (options_description.main_description) + { + const auto & main_options = options_description.main_description->options(); + std::transform(main_options.begin(), main_options.end(), std::back_inserter(cmd_options), getter); + } + + if (options_description.external_description) + { + const auto & external_options = options_description.external_description->options(); + std::transform(external_options.begin(), external_options.end(), std::back_inserter(cmd_options), getter); + } + + po::variables_map options; + parseAndCheckOptions(options_description, options, common_arguments); + po::notify(options); + + if (options.count("version") || options.count("V")) + { + showClientVersion(); + exit(0); // NOLINT(concurrency-mt-unsafe) + } + + if (options.count("version-clean")) + { + output_stream << VERSION_STRING; + exit(0); // NOLINT(concurrency-mt-unsafe) + } + + if (options.count("verbose")) + getClientConfiguration().setBool("verbose", true); + + /// Output of help message. + if (options.count("help") + || (options.count("host") && options["host"].as() == "elp")) /// If user writes -help instead of --help. + { + if (getClientConfiguration().getBool("verbose", false)) + printHelpMessage(options_description, true); + else + printHelpMessage(options_description_non_verbose, false); + exit(0); // NOLINT(concurrency-mt-unsafe) + } + + /// Common options for clickhouse-client and clickhouse-local. + + /// Output execution time to stderr in batch mode. + if (options.count("time")) + getClientConfiguration().setBool("print-time-to-stderr", true); + if (options.count("memory-usage")) + { + const auto & memory_usage_mode = options["memory-usage"].as(); + if (memory_usage_mode != "none" && memory_usage_mode != "default" && memory_usage_mode != "readable") + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown memory-usage mode: {}", memory_usage_mode); + getClientConfiguration().setString("print-memory-to-stderr", memory_usage_mode); + } + + if (options.count("query")) + queries = options["query"].as>(); + if (options.count("query_id")) + getClientConfiguration().setString("query_id", options["query_id"].as()); + if (options.count("database")) + getClientConfiguration().setString("database", options["database"].as()); + if (options.count("config-file")) + getClientConfiguration().setString("config-file", options["config-file"].as()); + if (options.count("queries-file")) + queries_files = options["queries-file"].as>(); + if (options.count("multiline")) + getClientConfiguration().setBool("multiline", true); + if (options.count("ignore-error")) + getClientConfiguration().setBool("ignore-error", true); + if (options.count("format")) + getClientConfiguration().setString("format", options["format"].as()); + if (options.count("output-format")) + getClientConfiguration().setString("output-format", options["output-format"].as()); + if (options.count("vertical")) + getClientConfiguration().setBool("vertical", true); + if (options.count("stacktrace")) + getClientConfiguration().setBool("stacktrace", true); + if (options.count("print-profile-events")) + getClientConfiguration().setBool("print-profile-events", true); + if (options.count("profile-events-delay-ms")) + getClientConfiguration().setUInt64("profile-events-delay-ms", options["profile-events-delay-ms"].as()); + /// Whether to print the number of processed rows at + if (options.count("processed-rows")) + getClientConfiguration().setBool("print-num-processed-rows", true); + if (options.count("progress")) + { + switch (options["progress"].as()) + { + case DEFAULT: + getClientConfiguration().setString("progress", "default"); + break; + case OFF: + getClientConfiguration().setString("progress", "off"); + break; + case TTY: + getClientConfiguration().setString("progress", "tty"); + break; + case ERR: + getClientConfiguration().setString("progress", "err"); + break; + } + } + if (options.count("echo")) + getClientConfiguration().setBool("echo", true); + if (options.count("disable_suggestion")) + getClientConfiguration().setBool("disable_suggestion", true); + if (options.count("wait_for_suggestions_to_load")) + getClientConfiguration().setBool("wait_for_suggestions_to_load", true); + if (options.count("suggestion_limit")) + getClientConfiguration().setInt("suggestion_limit", options["suggestion_limit"].as()); + if (options.count("highlight")) + getClientConfiguration().setBool("highlight", options["highlight"].as()); + if (options.count("history_file")) + getClientConfiguration().setString("history_file", options["history_file"].as()); + if (options.count("interactive")) + getClientConfiguration().setBool("interactive", true); + if (options.count("pager")) + getClientConfiguration().setString("pager", options["pager"].as()); + + if (options.count("log-level")) + Poco::Logger::root().setLevel(options["log-level"].as()); + if (options.count("server_logs_file")) + server_logs_file = options["server_logs_file"].as(); + + query_processing_stage = QueryProcessingStage::fromString(options["stage"].as()); + query_kind = parseQueryKind(options["query_kind"].as()); + profile_events.print = options.count("print-profile-events"); + profile_events.delay_ms = options["profile-events-delay-ms"].as(); + + processOptions(options_description, options, external_tables_arguments, hosts_and_ports_arguments); + { + std::unordered_set alias_names; + alias_names.reserve(options_description.main_description->options().size()); + for (const auto& option : options_description.main_description->options()) + alias_names.insert(option->long_name()); + argsToConfig(common_arguments, getClientConfiguration(), 100, &alias_names); + } + + clearPasswordFromCommandLine(argc, argv); + + /// Limit on total memory usage + std::string max_client_memory_usage = getClientConfiguration().getString("max_memory_usage_in_client", "0" /*default value*/); + if (max_client_memory_usage != "0") + { + UInt64 max_client_memory_usage_int = parseWithSizeSuffix(max_client_memory_usage.c_str(), max_client_memory_usage.length()); + + total_memory_tracker.setHardLimit(max_client_memory_usage_int); + total_memory_tracker.setDescription("(total)"); + total_memory_tracker.setMetric(CurrentMetrics::MemoryTracking); + } + + /// Print stacktrace in case of crash + HandledSignals::instance().setupTerminateHandler(); + HandledSignals::instance().setupCommonDeadlySignalHandlers(); + /// We don't setup signal handlers for SIGINT, SIGQUIT, SIGTERM because we don't + /// have an option for client to shutdown gracefully. + + fatal_channel_ptr = new Poco::SplitterChannel; + fatal_console_channel_ptr = new Poco::ConsoleChannel; + fatal_channel_ptr->addChannel(fatal_console_channel_ptr); + if (options.count("client_logs_file")) + { + fatal_file_channel_ptr = new Poco::SimpleFileChannel(options["client_logs_file"].as()); + fatal_channel_ptr->addChannel(fatal_file_channel_ptr); + } + + fatal_log = createLogger("ClientBase", fatal_channel_ptr.get(), Poco::Message::PRIO_FATAL); + signal_listener = std::make_unique(nullptr, fatal_log); + signal_listener_thread.start(*signal_listener); + +#if USE_GWP_ASAN + GWPAsan::initFinished(); +#endif + +} + + +} diff --git a/src/Client/ClientApplicationBase.h b/src/Client/ClientApplicationBase.h new file mode 100644 index 00000000000..217fa29c3f4 --- /dev/null +++ b/src/Client/ClientApplicationBase.h @@ -0,0 +1,54 @@ +#pragma once + + +#include +#include +#include +#include +#include +#include + +#include + +namespace po = boost::program_options; + +namespace DB +{ + +void interruptSignalHandler(int signum); + +/** + * The base class for client appliucations such as + * clickhouse-client or clickhouse-local. + * The main purpose and responsibility of it is dealing with + * application-specific stuff such as command line arguments parsing + * and setting up signal handlers, so queries will be cancelled after + * Ctrl+C is pressed. + */ +class ClientApplicationBase : public ClientBase, public Poco::Util::Application, public IHints<2> +{ +public: + using ClientBase::processOptions; + using Arguments = ClientBase::Arguments; + + static ClientApplicationBase & getInstance(); + + ClientApplicationBase(); + ~ClientApplicationBase() override; + + void init(int argc, char ** argv); + std::vector getAllRegisteredNames() const override { return cmd_options; } + +protected: + Poco::Util::LayeredConfiguration & getClientConfiguration() override; + void setupSignalHandler() override; + void addMultiquery(std::string_view query, Arguments & common_arguments) const; + +private: + void parseAndCheckOptions(OptionsDescription & options_description, po::variables_map & options, Arguments & arguments); + + std::vector cmd_options; +}; + + +} diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index a88eed25db1..9cf3b955d26 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -5,7 +5,6 @@ #include #include -#include #include #include #include @@ -17,7 +16,6 @@ #include #include #include -#include #include #include #include @@ -158,17 +156,6 @@ std::istream& operator>> (std::istream & in, ProgressOption & progress) return in; } -static ClientInfo::QueryKind parseQueryKind(const String & query_kind) -{ - if (query_kind == "initial_query") - return ClientInfo::QueryKind::INITIAL_QUERY; - if (query_kind == "secondary_query") - return ClientInfo::QueryKind::SECONDARY_QUERY; - if (query_kind == "no_query") - return ClientInfo::QueryKind::NO_QUERY; - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown query kind {}", query_kind); -} - static void incrementProfileEventsBlock(Block & dst, const Block & src) { if (!dst) @@ -269,36 +256,6 @@ static void incrementProfileEventsBlock(Block & dst, const Block & src) dst.setColumns(std::move(mutable_columns)); } - -std::atomic exit_after_signals = 0; - -class QueryInterruptHandler : private boost::noncopyable -{ -public: - /// Store how much interrupt signals can be before stopping the query - /// by default stop after the first interrupt signal. - static void start(Int32 signals_before_stop = 1) { exit_after_signals.store(signals_before_stop); } - - /// Set value not greater then 0 to mark the query as stopped. - static void stop() { exit_after_signals.store(0); } - - /// Return true if the query was stopped. - /// Query was stopped if it received at least "signals_before_stop" interrupt signals. - static bool try_stop() { return exit_after_signals.fetch_sub(1) <= 0; } - static bool cancelled() { return exit_after_signals.load() <= 0; } - - /// Return how much interrupt signals remain before stop. - static Int32 cancelled_status() { return exit_after_signals.load(); } -}; - -/// This signal handler is set for SIGINT and SIGQUIT. -void interruptSignalHandler(int signum) -{ - if (QueryInterruptHandler::try_stop()) - safeExit(128 + signum); -} - - /// To cancel the query on local format error. class LocalFormatError : public DB::Exception { @@ -345,31 +302,6 @@ ClientBase::ClientBase( terminal_width = getTerminalWidth(in_fd, err_fd); } -void ClientBase::setupSignalHandler() -{ - QueryInterruptHandler::stop(); - - struct sigaction new_act; - memset(&new_act, 0, sizeof(new_act)); - - new_act.sa_handler = interruptSignalHandler; - new_act.sa_flags = 0; - -#if defined(OS_DARWIN) - sigemptyset(&new_act.sa_mask); -#else - if (sigemptyset(&new_act.sa_mask)) - throw ErrnoException(ErrorCodes::CANNOT_SET_SIGNAL_HANDLER, "Cannot set signal handler"); -#endif - - if (sigaction(SIGINT, &new_act, nullptr)) - throw ErrnoException(ErrorCodes::CANNOT_SET_SIGNAL_HANDLER, "Cannot set signal handler"); - - if (sigaction(SIGQUIT, &new_act, nullptr)) - throw ErrnoException(ErrorCodes::CANNOT_SET_SIGNAL_HANDLER, "Cannot set signal handler"); -} - - ASTPtr ClientBase::parseQuery(const char *& pos, const char * end, const Settings & settings, bool allow_multi_statements) { std::unique_ptr parser; @@ -1113,8 +1045,8 @@ void ClientBase::processOrdinaryQuery(const String & query_to_execute, ASTPtr pa { try { - QueryInterruptHandler::start(signals_before_stop); - SCOPE_EXIT({ QueryInterruptHandler::stop(); }); + query_interrupt_handler.start(signals_before_stop); + SCOPE_EXIT({ query_interrupt_handler.stop(); }); connection->sendQuery( connection_parameters.timeouts, @@ -1178,13 +1110,13 @@ void ClientBase::receiveResult(ASTPtr parsed_query, Int32 signals_before_stop, b /// to avoid losing sync. if (!cancelled) { - if (partial_result_on_first_cancel && QueryInterruptHandler::cancelled_status() == signals_before_stop - 1) + if (partial_result_on_first_cancel && query_interrupt_handler.cancelled_status() == signals_before_stop - 1) { connection->sendCancel(); /// First cancel reading request was sent. Next requests will only be with a full cancel partial_result_on_first_cancel = false; } - else if (QueryInterruptHandler::cancelled()) + else if (query_interrupt_handler.cancelled()) { cancelQuery(); } @@ -1563,8 +1495,8 @@ void ClientBase::processInsertQuery(const String & query_to_execute, ASTPtr pars return; } - QueryInterruptHandler::start(); - SCOPE_EXIT({ QueryInterruptHandler::stop(); }); + query_interrupt_handler.start(); + SCOPE_EXIT({ query_interrupt_handler.stop(); }); connection->sendQuery( connection_parameters.timeouts, @@ -1775,7 +1707,7 @@ try Block block; while (executor.pull(block)) { - if (!cancelled && QueryInterruptHandler::cancelled()) + if (!cancelled && query_interrupt_handler.cancelled()) { cancelQuery(); executor.cancel(); @@ -2857,7 +2789,6 @@ void ClientBase::runLibFuzzer() void ClientBase::runLibFuzzer() {} #endif - void ClientBase::clearTerminal() { /// Clear from cursor until end of screen. @@ -2867,288 +2798,9 @@ void ClientBase::clearTerminal() output_stream << "\033[0J" "\033[?25h"; } - void ClientBase::showClientVersion() { output_stream << VERSION_NAME << " " + getName() + " version " << VERSION_STRING << VERSION_OFFICIAL << "." << std::endl; } -void ClientBase::init(int argc, char ** argv) -{ - namespace po = boost::program_options; - - /// Don't parse options with Poco library, we prefer neat boost::program_options. - stopOptionsProcessing(); - - stdin_is_a_tty = isatty(STDIN_FILENO); - stdout_is_a_tty = isatty(STDOUT_FILENO); - stderr_is_a_tty = isatty(STDERR_FILENO); - terminal_width = getTerminalWidth(); - - std::vector external_tables_arguments; - Arguments common_arguments = {""}; /// 0th argument is ignored. - std::vector hosts_and_ports_arguments; - - if (argc) - argv0 = argv[0]; - readArguments(argc, argv, common_arguments, external_tables_arguments, hosts_and_ports_arguments); - - /// Support for Unicode dashes - /// Interpret Unicode dashes as default double-hyphen - for (auto & arg : common_arguments) - { - // replace em-dash(U+2014) - boost::replace_all(arg, "—", "--"); - // replace en-dash(U+2013) - boost::replace_all(arg, "–", "--"); - // replace mathematical minus(U+2212) - boost::replace_all(arg, "−", "--"); - } - - - OptionsDescription options_description; - options_description.main_description.emplace(createOptionsDescription("Main options", terminal_width)); - - /// Common options for clickhouse-client and clickhouse-local. - options_description.main_description->add_options() - ("help", "print usage summary, combine with --verbose to display all options") - ("verbose", "print query and other debugging info") - ("version,V", "print version information and exit") - ("version-clean", "print version in machine-readable format and exit") - - ("config-file,C", po::value(), "config-file path") - - ("query,q", po::value>()->multitoken(), R"(Query. Can be specified multiple times (--query "SELECT 1" --query "SELECT 2") or once with multiple comma-separated queries (--query "SELECT 1; SELECT 2;"). In the latter case, INSERT queries with non-VALUE format must be separated by empty lines.)") - ("queries-file", po::value>()->multitoken(), "file path with queries to execute; multiple files can be specified (--queries-file file1 file2...)") - ("multiquery,n", "Obsolete, does nothing") - ("multiline,m", "If specified, allow multiline queries (do not send the query on Enter)") - ("database,d", po::value(), "database") - ("query_kind", po::value()->default_value("initial_query"), "One of initial_query/secondary_query/no_query") - ("query_id", po::value(), "query_id") - - ("history_file", po::value(), "path to history file") - - ("stage", po::value()->default_value("complete"), "Request query processing up to specified stage: complete,fetch_columns,with_mergeable_state,with_mergeable_state_after_aggregation,with_mergeable_state_after_aggregation_and_limit") - ("progress", po::value()->implicit_value(ProgressOption::TTY, "tty")->default_value(ProgressOption::DEFAULT, "default"), "Print progress of queries execution - to TTY: tty|on|1|true|yes; to STDERR non-interactive mode: err; OFF: off|0|false|no; DEFAULT - interactive to TTY, non-interactive is off") - - ("disable_suggestion,A", "Disable loading suggestion data. Note that suggestion data is loaded asynchronously through a second connection to ClickHouse server. Also it is reasonable to disable suggestion if you want to paste a query with TAB characters. Shorthand option -A is for those who get used to mysql client.") - ("wait_for_suggestions_to_load", "Load suggestion data synchonously.") - ("time,t", "print query execution time to stderr in non-interactive mode (for benchmarks)") - ("memory-usage", po::value()->implicit_value("default")->default_value("none"), "print memory usage to stderr in non-interactive mode (for benchmarks). Values: 'none', 'default', 'readable'") - - ("echo", "in batch mode, print query before execution") - - ("log-level", po::value(), "log level") - ("server_logs_file", po::value(), "put server logs into specified file") - - ("suggestion_limit", po::value()->default_value(10000), "Suggestion limit for how many databases, tables and columns to fetch.") - - ("format,f", po::value(), "default output format (and input format for clickhouse-local)") - ("output-format", po::value(), "default output format (this option has preference over --format)") - - ("vertical,E", "vertical output format, same as --format=Vertical or FORMAT Vertical or \\G at end of command") - ("highlight", po::value()->default_value(true), "enable or disable basic syntax highlight in interactive command line") - - ("ignore-error", "do not stop processing when an error occurs") - ("stacktrace", "print stack traces of exceptions") - ("hardware-utilization", "print hardware utilization information in progress bar") - ("print-profile-events", po::value(&profile_events.print)->zero_tokens(), "Printing ProfileEvents packets") - ("profile-events-delay-ms", po::value()->default_value(profile_events.delay_ms), "Delay between printing `ProfileEvents` packets (-1 - print only totals, 0 - print every single packet)") - ("processed-rows", "print the number of locally processed rows") - - ("interactive", "Process queries-file or --query query and start interactive mode") - ("pager", po::value(), "Pipe all output into this command (less or similar)") - ("max_memory_usage_in_client", po::value(), "Set memory limit in client/local server") - - ("fuzzer-args", po::value(), "Command line arguments for the LLVM's libFuzzer driver. Only relevant if the application is compiled with libFuzzer.") - - ("client_logs_file", po::value(), "Path to a file for writing client logs. Currently we only have fatal logs (when the client crashes)") - ; - - addOptions(options_description); - - OptionsDescription options_description_non_verbose = options_description; - - auto getter = [](const auto & op) - { - String op_long_name = op->long_name(); - return "--" + String(op_long_name); - }; - - if (options_description.main_description) - { - const auto & main_options = options_description.main_description->options(); - std::transform(main_options.begin(), main_options.end(), std::back_inserter(cmd_options), getter); - } - - if (options_description.external_description) - { - const auto & external_options = options_description.external_description->options(); - std::transform(external_options.begin(), external_options.end(), std::back_inserter(cmd_options), getter); - } - - po::variables_map options; - parseAndCheckOptions(options_description, options, common_arguments); - po::notify(options); - - if (options.count("version") || options.count("V")) - { - showClientVersion(); - exit(0); // NOLINT(concurrency-mt-unsafe) - } - - if (options.count("version-clean")) - { - output_stream << VERSION_STRING; - exit(0); // NOLINT(concurrency-mt-unsafe) - } - - if (options.count("verbose")) - getClientConfiguration().setBool("verbose", true); - - /// Output of help message. - if (options.count("help") - || (options.count("host") && options["host"].as() == "elp")) /// If user writes -help instead of --help. - { - if (getClientConfiguration().getBool("verbose", false)) - printHelpMessage(options_description, true); - else - printHelpMessage(options_description_non_verbose, false); - exit(0); // NOLINT(concurrency-mt-unsafe) - } - - /// Common options for clickhouse-client and clickhouse-local. - - /// Output execution time to stderr in batch mode. - if (options.count("time")) - getClientConfiguration().setBool("print-time-to-stderr", true); - if (options.count("memory-usage")) - { - const auto & memory_usage_mode = options["memory-usage"].as(); - if (memory_usage_mode != "none" && memory_usage_mode != "default" && memory_usage_mode != "readable") - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown memory-usage mode: {}", memory_usage_mode); - getClientConfiguration().setString("print-memory-to-stderr", memory_usage_mode); - } - - if (options.count("query")) - queries = options["query"].as>(); - if (options.count("query_id")) - getClientConfiguration().setString("query_id", options["query_id"].as()); - if (options.count("database")) - getClientConfiguration().setString("database", options["database"].as()); - if (options.count("config-file")) - getClientConfiguration().setString("config-file", options["config-file"].as()); - if (options.count("queries-file")) - queries_files = options["queries-file"].as>(); - if (options.count("multiline")) - getClientConfiguration().setBool("multiline", true); - if (options.count("ignore-error")) - getClientConfiguration().setBool("ignore-error", true); - if (options.count("format")) - getClientConfiguration().setString("format", options["format"].as()); - if (options.count("output-format")) - getClientConfiguration().setString("output-format", options["output-format"].as()); - if (options.count("vertical")) - getClientConfiguration().setBool("vertical", true); - if (options.count("stacktrace")) - getClientConfiguration().setBool("stacktrace", true); - if (options.count("print-profile-events")) - getClientConfiguration().setBool("print-profile-events", true); - if (options.count("profile-events-delay-ms")) - getClientConfiguration().setUInt64("profile-events-delay-ms", options["profile-events-delay-ms"].as()); - /// Whether to print the number of processed rows at - if (options.count("processed-rows")) - getClientConfiguration().setBool("print-num-processed-rows", true); - if (options.count("progress")) - { - switch (options["progress"].as()) - { - case DEFAULT: - getClientConfiguration().setString("progress", "default"); - break; - case OFF: - getClientConfiguration().setString("progress", "off"); - break; - case TTY: - getClientConfiguration().setString("progress", "tty"); - break; - case ERR: - getClientConfiguration().setString("progress", "err"); - break; - } - } - if (options.count("echo")) - getClientConfiguration().setBool("echo", true); - if (options.count("disable_suggestion")) - getClientConfiguration().setBool("disable_suggestion", true); - if (options.count("wait_for_suggestions_to_load")) - getClientConfiguration().setBool("wait_for_suggestions_to_load", true); - if (options.count("suggestion_limit")) - getClientConfiguration().setInt("suggestion_limit", options["suggestion_limit"].as()); - if (options.count("highlight")) - getClientConfiguration().setBool("highlight", options["highlight"].as()); - if (options.count("history_file")) - getClientConfiguration().setString("history_file", options["history_file"].as()); - if (options.count("interactive")) - getClientConfiguration().setBool("interactive", true); - if (options.count("pager")) - getClientConfiguration().setString("pager", options["pager"].as()); - - if (options.count("log-level")) - Poco::Logger::root().setLevel(options["log-level"].as()); - if (options.count("server_logs_file")) - server_logs_file = options["server_logs_file"].as(); - - query_processing_stage = QueryProcessingStage::fromString(options["stage"].as()); - query_kind = parseQueryKind(options["query_kind"].as()); - profile_events.print = options.count("print-profile-events"); - profile_events.delay_ms = options["profile-events-delay-ms"].as(); - - processOptions(options_description, options, external_tables_arguments, hosts_and_ports_arguments); - { - std::unordered_set alias_names; - alias_names.reserve(options_description.main_description->options().size()); - for (const auto& option : options_description.main_description->options()) - alias_names.insert(option->long_name()); - argsToConfig(common_arguments, getClientConfiguration(), 100, &alias_names); - } - - clearPasswordFromCommandLine(argc, argv); - - /// Limit on total memory usage - std::string max_client_memory_usage = getClientConfiguration().getString("max_memory_usage_in_client", "0" /*default value*/); - if (max_client_memory_usage != "0") - { - UInt64 max_client_memory_usage_int = parseWithSizeSuffix(max_client_memory_usage.c_str(), max_client_memory_usage.length()); - - total_memory_tracker.setHardLimit(max_client_memory_usage_int); - total_memory_tracker.setDescription("(total)"); - total_memory_tracker.setMetric(CurrentMetrics::MemoryTracking); - } - - /// Print stacktrace in case of crash - HandledSignals::instance().setupTerminateHandler(); - HandledSignals::instance().setupCommonDeadlySignalHandlers(); - /// We don't setup signal handlers for SIGINT, SIGQUIT, SIGTERM because we don't - /// have an option for client to shutdown gracefully. - - fatal_channel_ptr = new Poco::SplitterChannel; - fatal_console_channel_ptr = new Poco::ConsoleChannel; - fatal_channel_ptr->addChannel(fatal_console_channel_ptr); - if (options.count("client_logs_file")) - { - fatal_file_channel_ptr = new Poco::SimpleFileChannel(options["client_logs_file"].as()); - fatal_channel_ptr->addChannel(fatal_file_channel_ptr); - } - - fatal_log = createLogger("ClientBase", fatal_channel_ptr.get(), Poco::Message::PRIO_FATAL); - signal_listener = std::make_unique(nullptr, fatal_log); - signal_listener_thread.start(*signal_listener); - -#if USE_GWP_ASAN - GWPAsan::initFinished(); -#endif - -} - } diff --git a/src/Client/ClientBase.h b/src/Client/ClientBase.h index 7689744a373..557ac30d27c 100644 --- a/src/Client/ClientBase.h +++ b/src/Client/ClientBase.h @@ -1,26 +1,30 @@ #pragma once -#include -#include "Common/NamePrompter.h" -#include -#include + +#include +#include #include +#include #include #include -#include #include #include -#include #include #include #include #include -#include -#include -#include -#include -#include +#include +#include #include +#include +#include + +#include + +#include +#include +#include +#include namespace po = boost::program_options; @@ -64,9 +68,16 @@ std::istream& operator>> (std::istream & in, ProgressOption & progress); class InternalTextLogs; class WriteBufferFromFileDescriptor; -class ClientBase : public Poco::Util::Application, public IHints<2> +/** + * The base class which encapsulates the core functionality of a client. + * Can be used in a standalone application (clickhouse-client or clickhouse-local), + * or be embedded into server. + * Always keep in mind that there can be several instances of this class within + * a process. Thus, it cannot keep its state in global shared variables or even use them. + * The best example - std::cin, std::cout and std::cerr. + */ +class ClientBase { - public: using Arguments = std::vector; @@ -79,12 +90,11 @@ public: std::ostream & output_stream_ = std::cout, std::ostream & error_stream_ = std::cerr ); + virtual ~ClientBase(); - ~ClientBase() override; + bool tryStopQuery() { return query_interrupt_handler.tryStop(); } + void stopQuery() { return query_interrupt_handler.stop(); } - void init(int argc, char ** argv); - - std::vector getAllRegisteredNames() const override { return cmd_options; } ASTPtr parseQuery(const char *& pos, const char * end, const Settings & settings, bool allow_multi_statements); protected: @@ -114,7 +124,7 @@ protected: ASTPtr parsed_query, std::optional echo_query_ = {}, bool report_error = false); static void adjustQueryEnd(const char *& this_query_end, const char * all_queries_end, uint32_t max_parser_depth, uint32_t max_parser_backtracks); - static void setupSignalHandler(); + virtual void setupSignalHandler() = 0; bool executeMultiQuery(const String & all_queries_text); MultiQueryProcessingStage analyzeMultiQueryText( @@ -188,7 +198,6 @@ private: String prompt() const; void resetOutput(); - void parseAndCheckOptions(OptionsDescription & options_description, po::variables_map & options, Arguments & arguments); void updateSuggest(const ASTPtr & ast); @@ -196,6 +205,31 @@ private: bool addMergeTreeSettings(ASTCreateQuery & ast_create); protected: + + class QueryInterruptHandler : private boost::noncopyable + { + public: + /// Store how much interrupt signals can be before stopping the query + /// by default stop after the first interrupt signal. + void start(Int32 signals_before_stop = 1) { exit_after_signals.store(signals_before_stop); } + + /// Set value not greater then 0 to mark the query as stopped. + void stop() { exit_after_signals.store(0); } + + /// Return true if the query was stopped. + /// Query was stopped if it received at least "signals_before_stop" interrupt signals. + bool tryStop() { return exit_after_signals.fetch_sub(1) <= 0; } + bool cancelled() { return exit_after_signals.load() <= 0; } + + /// Return how much interrupt signals remain before stop. + Int32 cancelled_status() { return exit_after_signals.load(); } + + private: + std::atomic exit_after_signals = 0; + }; + + QueryInterruptHandler query_interrupt_handler; + static bool isSyncInsertWithData(const ASTInsertQuery & insert_query, const ContextPtr & context); bool processMultiQueryFromFile(const String & file_name); @@ -239,7 +273,6 @@ protected: std::vector queries; /// Queries passed via '--query' std::vector queries_files; /// If not empty, queries will be read from these files std::vector interleave_queries_files; /// If not empty, run queries from these files before processing every file from 'queries_files'. - std::vector cmd_options; bool stdin_is_a_tty = false; /// stdin is a terminal. bool stdout_is_a_tty = false; /// stdout is a terminal. From 9186e647eb672283b8cb95d2fc152e0994f6df6f Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Mon, 15 Jul 2024 17:05:09 +0000 Subject: [PATCH 408/661] Fix style --- src/Client/ClientBase.cpp | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 9cf3b955d26..56685f9d3f4 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -88,12 +88,6 @@ namespace fs = std::filesystem; using namespace std::literals; - -namespace CurrentMetrics -{ - extern const Metric MemoryTracking; -} - namespace DB { From af2c9fcaaf4e2f38c9db105c246ee24b095b256f Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Mon, 15 Jul 2024 18:18:42 +0000 Subject: [PATCH 409/661] Skip file --- utils/check-style/check-style | 1 + 1 file changed, 1 insertion(+) diff --git a/utils/check-style/check-style b/utils/check-style/check-style index 380656cd1ca..3c959617d02 100755 --- a/utils/check-style/check-style +++ b/utils/check-style/check-style @@ -318,6 +318,7 @@ std_cerr_cout_excludes=( src/Interpreters/Context.cpp # IProcessor::dump() src/Processors/IProcessor.cpp + src/Client/ClientApplicationBase.cpp src/Client/ClientBase.cpp src/Client/LineReader.cpp src/Client/QueryFuzzer.cpp From 8ba85074e74f403b3d5106f6ef811019075cefb4 Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Fri, 19 Jul 2024 12:25:17 +0000 Subject: [PATCH 410/661] Fix build --- src/Client/ClientApplicationBase.cpp | 16 +++++++++++++++- src/Client/ClientApplicationBase.h | 11 +++++++++++ src/Client/ClientBase.cpp | 14 +------------- src/Client/ClientBase.h | 10 ---------- 4 files changed, 27 insertions(+), 24 deletions(-) diff --git a/src/Client/ClientApplicationBase.cpp b/src/Client/ClientApplicationBase.cpp index 59c98983694..6b19898ef5c 100644 --- a/src/Client/ClientApplicationBase.cpp +++ b/src/Client/ClientApplicationBase.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include "config.h" @@ -51,7 +52,20 @@ void interruptSignalHandler(int signum) safeExit(128 + signum); } -ClientApplicationBase::~ClientApplicationBase() = default; +ClientApplicationBase::~ClientApplicationBase() +{ + try + { + writeSignalIDtoSignalPipe(SignalListener::StopThread); + signal_listener_thread.join(); + HandledSignals::instance().reset(); + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + } +} + ClientApplicationBase::ClientApplicationBase() : ClientBase(STDIN_FILENO, STDOUT_FILENO, STDERR_FILENO, std::cin, std::cout, std::cerr) {} ClientApplicationBase & ClientApplicationBase::getInstance() diff --git a/src/Client/ClientApplicationBase.h b/src/Client/ClientApplicationBase.h index 217fa29c3f4..771bb948cb7 100644 --- a/src/Client/ClientApplicationBase.h +++ b/src/Client/ClientApplicationBase.h @@ -6,6 +6,10 @@ #include #include #include +#include +#include +#include + #include #include @@ -48,6 +52,13 @@ private: void parseAndCheckOptions(OptionsDescription & options_description, po::variables_map & options, Arguments & arguments); std::vector cmd_options; + + LoggerPtr fatal_log; + Poco::AutoPtr fatal_channel_ptr; + Poco::AutoPtr fatal_console_channel_ptr; + Poco::AutoPtr fatal_file_channel_ptr; + Poco::Thread signal_listener_thread; + std::unique_ptr signal_listener; }; diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 56685f9d3f4..85dfb767e75 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -258,19 +258,7 @@ public: }; -ClientBase::~ClientBase() -{ - try - { - writeSignalIDtoSignalPipe(SignalListener::StopThread); - signal_listener_thread.join(); - HandledSignals::instance().reset(); - } - catch (...) - { - tryLogCurrentException(__PRETTY_FUNCTION__); - } -} +ClientBase::~ClientBase() = default; ClientBase::ClientBase( int in_fd_, diff --git a/src/Client/ClientBase.h b/src/Client/ClientBase.h index 557ac30d27c..304d8c4b890 100644 --- a/src/Client/ClientBase.h +++ b/src/Client/ClientBase.h @@ -9,9 +9,6 @@ #include #include #include -#include -#include -#include #include #include #include @@ -253,13 +250,6 @@ protected: /// Client context is a context used only by the client to parse queries, process query parameters and to connect to clickhouse-server. ContextMutablePtr client_context; - LoggerPtr fatal_log; - Poco::AutoPtr fatal_channel_ptr; - Poco::AutoPtr fatal_console_channel_ptr; - Poco::AutoPtr fatal_file_channel_ptr; - Poco::Thread signal_listener_thread; - std::unique_ptr signal_listener; - bool is_interactive = false; /// Use either interactive line editing interface or batch mode. bool delayed_interactive = false; From 49dc30d5c28392d361ce0ef1e18f7db73841617f Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Wed, 24 Jul 2024 21:42:36 +0000 Subject: [PATCH 411/661] Small adjustement --- src/Client/ClientApplicationBase.cpp | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/Client/ClientApplicationBase.cpp b/src/Client/ClientApplicationBase.cpp index 6b19898ef5c..0306468d084 100644 --- a/src/Client/ClientApplicationBase.cpp +++ b/src/Client/ClientApplicationBase.cpp @@ -48,8 +48,13 @@ static ClientInfo::QueryKind parseQueryKind(const String & query_kind) /// This signal handler is set only for SIGINT and SIGQUIT. void interruptSignalHandler(int signum) { - if (ClientApplicationBase::getInstance().tryStopQuery()) - safeExit(128 + signum); + /// Signal handler might be called even before the setup is fully finished + /// and client application started to process the query. + /// Because of that we have to manually check it. + if (auto * instance = ClientApplicationBase::instanceRawPtr(); instance) + if (auto * base = dynamic_cast(instance); base) + if (base->tryStopQuery()) + safeExit(128 + signum); } ClientApplicationBase::~ClientApplicationBase() From c7c1f10720cd194d85de6d81156cbd37304ab52b Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Wed, 24 Jul 2024 21:45:16 +0000 Subject: [PATCH 412/661] Added new method --- base/poco/Util/include/Poco/Util/Application.h | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/base/poco/Util/include/Poco/Util/Application.h b/base/poco/Util/include/Poco/Util/Application.h index c8d18e1bce9..d1a2021eb67 100644 --- a/base/poco/Util/include/Poco/Util/Application.h +++ b/base/poco/Util/include/Poco/Util/Application.h @@ -261,6 +261,11 @@ namespace Util /// /// Throws a NullPointerException if no Application instance exists. + static Application * instanceRawPtr(); + /// Returns a raw pointer to the Application sigleton. + /// + /// The caller should check whether the result is nullptr. + const Poco::Timestamp & startTime() const; /// Returns the application start time (UTC). @@ -448,6 +453,12 @@ namespace Util } + inline Application * Application::instanceRawPtr() + { + return _pInstance; + } + + inline const Poco::Timestamp & Application::startTime() const { return _startTime; From 6f068639db627944aaab978c79866ad5a2a234e7 Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Thu, 25 Jul 2024 16:38:26 +0000 Subject: [PATCH 413/661] Better --- src/Client/ClientApplicationBase.h | 1 - src/Client/ClientBase.h | 5 +++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/src/Client/ClientApplicationBase.h b/src/Client/ClientApplicationBase.h index 771bb948cb7..3663271dd25 100644 --- a/src/Client/ClientApplicationBase.h +++ b/src/Client/ClientApplicationBase.h @@ -5,7 +5,6 @@ #include #include #include -#include #include #include #include diff --git a/src/Client/ClientBase.h b/src/Client/ClientBase.h index 304d8c4b890..175ebe97075 100644 --- a/src/Client/ClientBase.h +++ b/src/Client/ClientBase.h @@ -2,6 +2,7 @@ #include +#include #include #include #include @@ -9,9 +10,13 @@ #include #include #include +#include +#include +#include #include #include #include + #include #include #include From a457db34b216ea987a4875a3cbeb5363878d5d5a Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Thu, 25 Jul 2024 16:58:41 +0000 Subject: [PATCH 414/661] Fixed a typo --- base/poco/Util/include/Poco/Util/Application.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/base/poco/Util/include/Poco/Util/Application.h b/base/poco/Util/include/Poco/Util/Application.h index d1a2021eb67..786e331fe73 100644 --- a/base/poco/Util/include/Poco/Util/Application.h +++ b/base/poco/Util/include/Poco/Util/Application.h @@ -262,7 +262,7 @@ namespace Util /// Throws a NullPointerException if no Application instance exists. static Application * instanceRawPtr(); - /// Returns a raw pointer to the Application sigleton. + /// Returns a raw pointer to the Application singleton. /// /// The caller should check whether the result is nullptr. From 2f255dc68d2cffb8bc17efc153a9e75a9166675d Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Fri, 26 Jul 2024 01:11:40 +0200 Subject: [PATCH 415/661] Fix clang-tidy --- src/Client/ClientBase.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Client/ClientBase.h b/src/Client/ClientBase.h index 175ebe97075..1a23b6b1363 100644 --- a/src/Client/ClientBase.h +++ b/src/Client/ClientBase.h @@ -95,7 +95,7 @@ public: virtual ~ClientBase(); bool tryStopQuery() { return query_interrupt_handler.tryStop(); } - void stopQuery() { return query_interrupt_handler.stop(); } + void stopQuery() { query_interrupt_handler.stop(); } ASTPtr parseQuery(const char *& pos, const char * end, const Settings & settings, bool allow_multi_statements); From 9b4accebb3d19789e69b422ae2f235149e453f94 Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Mon, 29 Jul 2024 15:47:52 +0000 Subject: [PATCH 416/661] Fix build --- src/Client/ClientApplicationBase.cpp | 162 ------------------------ src/Client/ClientBaseOptimizedParts.cpp | 4 +- 2 files changed, 2 insertions(+), 164 deletions(-) diff --git a/src/Client/ClientApplicationBase.cpp b/src/Client/ClientApplicationBase.cpp index 0306468d084..4aa8b6c0bbe 100644 --- a/src/Client/ClientApplicationBase.cpp +++ b/src/Client/ClientApplicationBase.cpp @@ -102,168 +102,6 @@ void ClientApplicationBase::setupSignalHandler() throw ErrnoException(ErrorCodes::CANNOT_SET_SIGNAL_HANDLER, "Cannot set signal handler"); } - -namespace -{ - -/// Define transparent hash to we can use -/// std::string_view with the containers -struct TransparentStringHash -{ - using is_transparent = void; - size_t operator()(std::string_view txt) const - { - return std::hash{}(txt); - } -}; - -/* - * This functor is used to parse command line arguments and replace dashes with underscores, - * allowing options to be specified using either dashes or underscores. - */ -class OptionsAliasParser -{ -public: - explicit OptionsAliasParser(const boost::program_options::options_description& options) - { - options_names.reserve(options.options().size()); - for (const auto& option : options.options()) - options_names.insert(option->long_name()); - } - - /* - * Parses arguments by replacing dashes with underscores, and matches the resulting name with known options - * Implements boost::program_options::ext_parser logic - */ - std::pair operator()(const std::string & token) const - { - if (!token.starts_with("--")) - return {}; - std::string arg = token.substr(2); - - // divide token by '=' to separate key and value if options style=long_allow_adjacent - auto pos_eq = arg.find('='); - std::string key = arg.substr(0, pos_eq); - - if (options_names.contains(key)) - // option does not require any changes, because it is already correct - return {}; - - std::replace(key.begin(), key.end(), '-', '_'); - if (!options_names.contains(key)) - // after replacing '-' with '_' argument is still unknown - return {}; - - std::string value; - if (pos_eq != std::string::npos && pos_eq < arg.size()) - value = arg.substr(pos_eq + 1); - - return {key, value}; - } - -private: - std::unordered_set options_names; -}; - -} - -/// Enable optimizations even in debug builds because otherwise options parsing becomes extremely slow affecting .sh tests -#if defined(__clang__) -#pragma clang optimize on -#endif -void ClientApplicationBase::parseAndCheckOptions(OptionsDescription & options_description, po::variables_map & options, Arguments & arguments) -{ - if (allow_repeated_settings) - addProgramOptionsAsMultitokens(cmd_settings, options_description.main_description.value()); - else - addProgramOptions(cmd_settings, options_description.main_description.value()); - - if (allow_merge_tree_settings) - { - /// Add merge tree settings manually, because names of some settings - /// may clash. Query settings have higher priority and we just - /// skip ambiguous merge tree settings. - auto & main_options = options_description.main_description.value(); - - std::unordered_set> main_option_names; - for (const auto & option : main_options.options()) - main_option_names.insert(option->long_name()); - - for (const auto & setting : cmd_merge_tree_settings.all()) - { - const auto add_setting = [&](const std::string_view name) - { - if (auto it = main_option_names.find(name); it != main_option_names.end()) - return; - - if (allow_repeated_settings) - addProgramOptionAsMultitoken(cmd_merge_tree_settings, main_options, name, setting); - else - addProgramOption(cmd_merge_tree_settings, main_options, name, setting); - }; - - const auto & setting_name = setting.getName(); - - add_setting(setting_name); - - const auto & settings_to_aliases = MergeTreeSettings::Traits::settingsToAliases(); - if (auto it = settings_to_aliases.find(setting_name); it != settings_to_aliases.end()) - { - for (const auto alias : it->second) - { - add_setting(alias); - } - } - } - } - - /// Parse main commandline options. - auto parser = po::command_line_parser(arguments) - .options(options_description.main_description.value()) - .extra_parser(OptionsAliasParser(options_description.main_description.value())) - .allow_unregistered(); - po::parsed_options parsed = parser.run(); - - /// Check unrecognized options without positional options. - auto unrecognized_options = po::collect_unrecognized(parsed.options, po::collect_unrecognized_mode::exclude_positional); - if (!unrecognized_options.empty()) - { - auto hints = this->getHints(unrecognized_options[0]); - if (!hints.empty()) - throw Exception(ErrorCodes::UNRECOGNIZED_ARGUMENTS, "Unrecognized option '{}'. Maybe you meant {}", - unrecognized_options[0], toString(hints)); - - throw Exception(ErrorCodes::UNRECOGNIZED_ARGUMENTS, "Unrecognized option '{}'", unrecognized_options[0]); - } - - /// Check positional options. - for (const auto & op : parsed.options) - { - if (!op.unregistered && op.string_key.empty() && !op.original_tokens[0].starts_with("--") - && !op.original_tokens[0].empty() && !op.value.empty()) - { - /// Two special cases for better usability: - /// - if the option contains a whitespace, it might be a query: clickhouse "SELECT 1" - /// These are relevant for interactive usage - user-friendly, but questionable in general. - /// In case of ambiguity or for scripts, prefer using proper options. - - const auto & token = op.original_tokens[0]; - po::variable_value value(boost::any(op.value), false); - - const char * option; - if (token.contains(' ')) - option = "query"; - else - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Positional option `{}` is not supported.", token); - - if (!options.emplace(option, value).second) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Positional option `{}` is not supported.", token); - } - } - - po::store(parsed, options); -} - void ClientApplicationBase::addMultiquery(std::string_view query, Arguments & common_arguments) const { common_arguments.emplace_back("--multiquery"); diff --git a/src/Client/ClientBaseOptimizedParts.cpp b/src/Client/ClientBaseOptimizedParts.cpp index 421843a0e79..297b8e7ce51 100644 --- a/src/Client/ClientBaseOptimizedParts.cpp +++ b/src/Client/ClientBaseOptimizedParts.cpp @@ -1,4 +1,4 @@ -#include +#include #include namespace DB @@ -80,7 +80,7 @@ private: } -void ClientBase::parseAndCheckOptions(OptionsDescription & options_description, po::variables_map & options, Arguments & arguments) +void ClientApplicationBase::parseAndCheckOptions(OptionsDescription & options_description, po::variables_map & options, Arguments & arguments) { if (allow_repeated_settings) addProgramOptionsAsMultitokens(cmd_settings, options_description.main_description.value()); From 6f16ca02a74a81956b7524958c5f94eb7a2c7bf5 Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Mon, 29 Jul 2024 16:14:31 +0000 Subject: [PATCH 417/661] Fixed Style Check --- src/Client/ClientApplicationBase.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Client/ClientApplicationBase.cpp b/src/Client/ClientApplicationBase.cpp index 4aa8b6c0bbe..9f133616d2e 100644 --- a/src/Client/ClientApplicationBase.cpp +++ b/src/Client/ClientApplicationBase.cpp @@ -31,7 +31,6 @@ namespace ErrorCodes { extern const int BAD_ARGUMENTS; extern const int CANNOT_SET_SIGNAL_HANDLER; - extern const int UNRECOGNIZED_ARGUMENTS; } static ClientInfo::QueryKind parseQueryKind(const String & query_kind) From 75728ac56d83b85e476162a745686837cb194b73 Mon Sep 17 00:00:00 2001 From: Halersson Paris <142428374+halersson@users.noreply.github.com> Date: Mon, 29 Jul 2024 14:42:58 -0300 Subject: [PATCH 418/661] Fix typo --- src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp index e837d4d5e20..bc5e8292192 100644 --- a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp @@ -420,7 +420,7 @@ void ParquetBlockInputFormat::initializeIfNeeded() int num_row_groups = metadata->num_row_groups(); row_group_batches.reserve(num_row_groups); - auto adative_chunk_size = [&](int row_group_idx) -> size_t + auto adaptive_chunk_size = [&](int row_group_idx) -> size_t { size_t total_size = 0; auto row_group_meta = metadata->RowGroup(row_group_idx); @@ -457,7 +457,7 @@ void ParquetBlockInputFormat::initializeIfNeeded() row_group_batches.back().row_groups_idxs.push_back(row_group); row_group_batches.back().total_rows += metadata->RowGroup(row_group)->num_rows(); row_group_batches.back().total_bytes_compressed += metadata->RowGroup(row_group)->total_compressed_size(); - auto rows = adative_chunk_size(row_group); + auto rows = adaptive_chunk_size(row_group); row_group_batches.back().adaptive_chunk_size = rows ? rows : format_settings.parquet.max_block_size; } } From 06b3185e57953c43531e4281f22bc03a8cd424e7 Mon Sep 17 00:00:00 2001 From: Ilya Yatsishin <2159081+qoega@users.noreply.github.com> Date: Mon, 29 Jul 2024 20:05:29 +0200 Subject: [PATCH 419/661] fixes --- src/Interpreters/InterpreterCreateQuery.cpp | 14 +- .../test_restore_external_engines/test.py | 133 ++++++++++++++---- 2 files changed, 111 insertions(+), 36 deletions(-) diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index a5eb3a83365..6e689c59c09 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -1031,13 +1031,6 @@ void InterpreterCreateQuery::setEngine(ASTCreateQuery & create) const /// Some part of storage definition (such as PARTITION BY) is specified, but ENGINE is not: just set default one. setDefaultTableEngine(*to_engine, getContext()->getSettingsRef().default_table_engine.value); } - /// For external tables with restore_replace_external_engine_to_null setting we replace external engines to - /// Null table engine. - else if (getContext()->getSettingsRef().restore_replace_external_engines_to_null) - { - if (StorageFactory::instance().getStorageFeatures(create.storage->engine->name).source_access_type != AccessType::NONE) - setNullTableEngine(*create.storage); - } return; } } @@ -1050,6 +1043,13 @@ void InterpreterCreateQuery::setEngine(ASTCreateQuery & create) const /// Some part of storage definition (such as PARTITION BY) is specified, but ENGINE is not: just set default one. setDefaultTableEngine(*create.storage, getContext()->getSettingsRef().default_table_engine.value); } + /// For external tables with restore_replace_external_engine_to_null setting we replace external engines to + /// Null table engine. + else if (getContext()->getSettingsRef().restore_replace_external_engines_to_null) + { + if (StorageFactory::instance().getStorageFeatures(create.storage->engine->name).source_access_type != AccessType::NONE) + setNullTableEngine(*create.storage); + } return; } diff --git a/tests/integration/test_restore_external_engines/test.py b/tests/integration/test_restore_external_engines/test.py index eb88da6b61f..cf189f2a6ed 100644 --- a/tests/integration/test_restore_external_engines/test.py +++ b/tests/integration/test_restore_external_engines/test.py @@ -7,52 +7,75 @@ from helpers.cluster import ClickHouseCluster cluster = ClickHouseCluster(__file__) configs = ["configs/remote_servers.xml", "configs/backups_disk.xml"] -node1 = cluster.add_instance("replica1", with_zookeeper=True, with_mysql8=True, main_configs=configs, external_dirs=["/backups/"]) -node2 = cluster.add_instance("replica2", with_zookeeper=True, with_mysql8=True, main_configs=configs, external_dirs=["/backups/"]) -node3 = cluster.add_instance("replica3", with_zookeeper=True, with_mysql8=True, main_configs=configs, external_dirs=["/backups/"]) +node1 = cluster.add_instance( + "replica1", + with_zookeeper=True, + with_mysql8=True, + main_configs=configs, + external_dirs=["/backups/"], +) +node2 = cluster.add_instance( + "replica2", + with_zookeeper=True, + with_mysql8=True, + main_configs=configs, + external_dirs=["/backups/"], +) +node3 = cluster.add_instance( + "replica3", + with_zookeeper=True, + with_mysql8=True, + main_configs=configs, + external_dirs=["/backups/"], +) nodes = [node1, node2, node3] backup_id_counter = 0 + def new_backup_name(): global backup_id_counter backup_id_counter += 1 return f"Disk('backups', '{backup_id_counter}/')" + def cleanup_nodes(nodes, dbname): for node in nodes: node.query(f"DROP DATABASE IF EXISTS {dbname} SYNC") + def fill_nodes(nodes, dbname): cleanup_nodes(nodes, dbname) for node in nodes: - node.query(f"CREATE DATABASE {dbname} ENGINE = Replicated('/clickhouse/databases/{dbname}', 'default', '{node.name}')") + node.query( + f"CREATE DATABASE {dbname} ENGINE = Replicated('/clickhouse/databases/{dbname}', 'default', '{node.name}')" + ) + def drop_mysql_table(conn, tableName): with conn.cursor() as cursor: cursor.execute(f"DROP TABLE IF EXISTS `clickhouse`.`{tableName}`") + def get_mysql_conn(cluster): conn = pymysql.connect( - user="root", password="clickhouse", host=cluster.mysql8_ip, port=cluster.mysql8_port + user="root", + password="clickhouse", + host=cluster.mysql8_ip, + port=cluster.mysql8_port, ) return conn + def fill_tables(cluster, dbname): fill_nodes(nodes, dbname) conn = get_mysql_conn(cluster) with conn.cursor() as cursor: - cursor.execute( - "DROP DATABASE IF EXISTS clickhouse" - ) - cursor.execute( - "CREATE DATABASE clickhouse" - ) - cursor.execute( - "DROP TABLE IF EXISTS clickhouse.inference_table" - ) + cursor.execute("DROP DATABASE IF EXISTS clickhouse") + cursor.execute("CREATE DATABASE clickhouse") + cursor.execute("DROP TABLE IF EXISTS clickhouse.inference_table") cursor.execute( "CREATE TABLE clickhouse.inference_table (id INT PRIMARY KEY, data BINARY(16) NOT NULL)" ) @@ -66,17 +89,30 @@ def fill_tables(cluster, dbname): node1.query( f"CREATE TABLE {dbname}.mysql_schema_inference_engine ENGINE=MySQL({parameters})" ) - node1.query(f"CREATE TABLE {dbname}.mysql_schema_inference_function AS mysql({parameters})") + node1.query( + f"CREATE TABLE {dbname}.mysql_schema_inference_function AS mysql({parameters})" + ) node1.query(f"CREATE TABLE {dbname}.merge_tree (id UInt64, b String) ORDER BY id") node1.query(f"INSERT INTO {dbname}.merge_tree VALUES (100, 'abc')") expected = "id\tInt32\t\t\t\t\t\ndata\tFixedString(16)\t\t\t\t\t\n" - assert node1.query(f"DESCRIBE TABLE {dbname}.mysql_schema_inference_engine") == expected - assert node1.query(f"DESCRIBE TABLE {dbname}.mysql_schema_inference_function") == expected + assert ( + node1.query(f"DESCRIBE TABLE {dbname}.mysql_schema_inference_engine") + == expected + ) + assert ( + node1.query(f"DESCRIBE TABLE {dbname}.mysql_schema_inference_function") + == expected + ) assert node1.query(f"SELECT id FROM mysql({parameters})") == "100\n" - assert node1.query(f"SELECT id FROM {dbname}.mysql_schema_inference_engine") == "100\n" - assert node1.query(f"SELECT id FROM {dbname}.mysql_schema_inference_function") == "100\n" + assert ( + node1.query(f"SELECT id FROM {dbname}.mysql_schema_inference_engine") == "100\n" + ) + assert ( + node1.query(f"SELECT id FROM {dbname}.mysql_schema_inference_function") + == "100\n" + ) assert node1.query(f"SELECT id FROM {dbname}.merge_tree") == "100\n" @@ -92,6 +128,7 @@ def start_cluster(): finally: cluster.shutdown() + def test_restore_table(start_cluster): fill_tables(cluster, "replicated") backup_name = new_backup_name() @@ -107,12 +144,26 @@ def test_restore_table(start_cluster): assert node3.query("EXISTS replicated.mysql_schema_inference_engine") == "0\n" assert node3.query("EXISTS replicated.mysql_schema_inference_function") == "0\n" - node3.query(f"RESTORE DATABASE replicated FROM {backup_name} SETTINGS allow_different_database_def=true") + node3.query( + f"RESTORE DATABASE replicated FROM {backup_name} SETTINGS allow_different_database_def=true" + ) node1.query(f"SYSTEM SYNC DATABASE REPLICA replicated") - assert node1.query("SELECT count(), sum(id) FROM replicated.mysql_schema_inference_engine") == "1\t100\n" - assert node1.query("SELECT count(), sum(id) FROM replicated.mysql_schema_inference_function") == "1\t100\n" - assert node1.query("SELECT count(), sum(id) FROM replicated.merge_tree") == "1\t100\n" + assert ( + node1.query( + "SELECT count(), sum(id) FROM replicated.mysql_schema_inference_engine" + ) + == "1\t100\n" + ) + assert ( + node1.query( + "SELECT count(), sum(id) FROM replicated.mysql_schema_inference_function" + ) + == "1\t100\n" + ) + assert ( + node1.query("SELECT count(), sum(id) FROM replicated.merge_tree") == "1\t100\n" + ) cleanup_nodes(nodes, "replicated") @@ -132,12 +183,36 @@ def test_restore_table_null(start_cluster): assert node3.query("EXISTS replicated2.mysql_schema_inference_engine") == "0\n" assert node3.query("EXISTS replicated2.mysql_schema_inference_function") == "0\n" - node3.query(f"RESTORE DATABASE replicated2 FROM {backup_name} SETTINGS allow_different_database_def=1, allow_different_table_def=1 SETTINGS restore_replace_external_engines_to_null=1, restore_replace_external_table_functions_to_null=1") + node3.query( + f"RESTORE DATABASE replicated2 FROM {backup_name} SETTINGS allow_different_database_def=1, allow_different_table_def=1 SETTINGS restore_replace_external_engines_to_null=1, restore_replace_external_table_functions_to_null=1" + ) node1.query(f"SYSTEM SYNC DATABASE REPLICA replicated2") - assert node1.query("SELECT count(), sum(id) FROM replicated2.mysql_schema_inference_engine") == "0\t0\n" - assert node1.query("SELECT count(), sum(id) FROM replicated2.mysql_schema_inference_function") == "0\t0\n" - assert node1.query("SELECT count(), sum(id) FROM replicated2.merge_tree") == "1\t100\n" - assert node1.query("SELECT engine FROM system.tables where database = 'replicated2' and name like '%mysql%'") == "Null\nNull\n" - assert node1.query("SELECT engine FROM system.tables where database = 'replicated2' and name like '%merge_tree%'") == "MergeTree\n" + assert ( + node1.query( + "SELECT count(), sum(id) FROM replicated2.mysql_schema_inference_engine" + ) + == "0\t0\n" + ) + assert ( + node1.query( + "SELECT count(), sum(id) FROM replicated2.mysql_schema_inference_function" + ) + == "0\t0\n" + ) + assert ( + node1.query("SELECT count(), sum(id) FROM replicated2.merge_tree") == "1\t100\n" + ) + assert ( + node1.query( + "SELECT engine FROM system.tables where database = 'replicated2' and name like '%mysql%'" + ) + == "Null\nNull\n" + ) + assert ( + node1.query( + "SELECT engine FROM system.tables where database = 'replicated2' and name like '%merge_tree%'" + ) + == "MergeTree\n" + ) cleanup_nodes(nodes, "replicated2") From cb056cf3a5080cbff61f6efd070733ae2061d5b8 Mon Sep 17 00:00:00 2001 From: Peter Nguyen Date: Mon, 29 Jul 2024 12:05:44 -0700 Subject: [PATCH 420/661] Add camelCase aliases for percentRank() and denseRank() for percent_rank() and dense_rank() --- src/Processors/Transforms/WindowTransform.cpp | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/Processors/Transforms/WindowTransform.cpp b/src/Processors/Transforms/WindowTransform.cpp index 2b255c5120e..a1b46c8e36c 100644 --- a/src/Processors/Transforms/WindowTransform.cpp +++ b/src/Processors/Transforms/WindowTransform.cpp @@ -2721,20 +2721,24 @@ void registerWindowFunctions(AggregateFunctionFactory & factory) parameters); }, properties}, AggregateFunctionFactory::Case::Insensitive); - factory.registerFunction("dense_rank", {[](const std::string & name, + factory.registerFunction("denseRank", {[](const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings *) { return std::make_shared(name, argument_types, parameters); }, properties}, AggregateFunctionFactory::Case::Insensitive); - factory.registerFunction("percent_rank", {[](const std::string & name, + factory.registerAlias("dense_rank", "denseRank", AggregateFunctionFactory::Case::Sensitive); + + factory.registerFunction("percentRank", {[](const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings *) { return std::make_shared(name, argument_types, parameters); }, properties}, AggregateFunctionFactory::Case::Insensitive); + factory.registerAlias("percent_rank", "percentRank", AggregateFunctionFactory::Case::Sensitive); + factory.registerFunction("row_number", {[](const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings *) { From 9811a2e71b825a55c376edfb38303c817493cd9e Mon Sep 17 00:00:00 2001 From: Peter Nguyen Date: Sun, 28 Jul 2024 17:32:32 -0700 Subject: [PATCH 421/661] Add test 03213_denseRank_percentRank_alias --- ...3213_denseRank_percentRank_alias.reference | 45 ++++++++++++++ .../03213_denseRank_percentRank_alias.sql | 59 +++++++++++++++++++ 2 files changed, 104 insertions(+) create mode 100644 tests/queries/0_stateless/03213_denseRank_percentRank_alias.reference create mode 100644 tests/queries/0_stateless/03213_denseRank_percentRank_alias.sql diff --git a/tests/queries/0_stateless/03213_denseRank_percentRank_alias.reference b/tests/queries/0_stateless/03213_denseRank_percentRank_alias.reference new file mode 100644 index 00000000000..b49e179df68 --- /dev/null +++ b/tests/queries/0_stateless/03213_denseRank_percentRank_alias.reference @@ -0,0 +1,45 @@ +---- denseRank() ---- +0 0 0 1 1 1 1 +3 0 0 2 2 2 2 +1 0 1 3 3 3 3 +4 0 1 4 4 4 4 +2 0 2 5 5 5 5 +6 1 0 1 1 1 1 +9 1 0 2 2 2 2 +7 1 1 3 3 3 3 +5 1 2 4 4 4 4 +8 1 2 5 5 5 5 +12 2 0 1 1 1 1 +10 2 1 2 2 2 2 +13 2 1 3 3 3 3 +11 2 2 4 4 4 4 +14 2 2 5 5 5 5 +15 3 0 1 1 1 1 +18 3 0 2 2 2 2 +16 3 1 3 3 3 3 +19 3 1 4 4 4 4 +17 3 2 5 5 5 5 +21 4 0 1 1 1 1 +24 4 0 2 2 2 2 +22 4 1 3 3 3 3 +20 4 2 4 4 4 4 +23 4 2 5 5 5 5 +27 5 0 1 1 1 1 +25 5 1 2 2 2 2 +28 5 1 3 3 3 3 +26 5 2 4 4 4 4 +29 5 2 5 5 5 5 +30 6 0 1 1 1 1 +---- percentRank() ---- +Lenovo Thinkpad Laptop 700 1 0 +Sony VAIO Laptop 700 1 0 +Dell Vostro Laptop 800 3 0.6666666666666666 +HP Elite Laptop 1200 4 1 +Microsoft Lumia Smartphone 200 1 0 +HTC One Smartphone 400 2 0.3333333333333333 +Nexus Smartphone 500 3 0.6666666666666666 +iPhone Smartphone 900 4 1 +Kindle Fire Tablet 150 1 0 +Samsung Galaxy Tab Tablet 200 2 0.5 +iPad Tablet 700 3 1 +Others Unknow 200 1 0 diff --git a/tests/queries/0_stateless/03213_denseRank_percentRank_alias.sql b/tests/queries/0_stateless/03213_denseRank_percentRank_alias.sql new file mode 100644 index 00000000000..ff841294eb1 --- /dev/null +++ b/tests/queries/0_stateless/03213_denseRank_percentRank_alias.sql @@ -0,0 +1,59 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/67042 +-- Reference generated using percent_rank() and dense_rank() + +-- From ClickHouse/tests/queries/0_stateless/01591_window_functions.sql (for deterministic query) +SELECT '---- denseRank() ----'; +select number, p, o, + count(*) over w, + rank() over w, + denseRank() over w, + row_number() over w +from (select number, intDiv(number, 5) p, mod(number, 3) o + from numbers(31) order by o, number) t +window w as (partition by p order by o, number) +order by p, o, number +settings max_block_size = 2; + +-- Modifed from ClickHouse/tests/queries/0_stateless/01592_window_functions.sql (for deterministic query) +SELECT '---- percentRank() ----'; + +drop table if exists product_groups; +drop table if exists products; + +CREATE TABLE product_groups ( + group_id Int64, + group_name String +) Engine = Memory; + +CREATE TABLE products ( + product_id Int64, + product_name String, + price DECIMAL(11, 2), + group_id Int64 +) Engine = Memory; + +INSERT INTO product_groups VALUES (1, 'Smartphone'),(2, 'Laptop'),(3, 'Tablet'); +INSERT INTO products (product_id,product_name, group_id,price) VALUES (1, 'Microsoft Lumia', 1, 200), (2, 'HTC One', 1, 400), (3, 'Nexus', 1, 500), (4, 'iPhone', 1, 900),(5, 'HP Elite', 2, 1200),(6, 'Lenovo Thinkpad', 2, 700),(7, 'Sony VAIO', 2, 700),(8, 'Dell Vostro', 2, 800),(9, 'iPad', 3, 700),(10, 'Kindle Fire', 3, 150),(11, 'Samsung Galaxy Tab', 3, 200); +INSERT INTO product_groups VALUES (4, 'Unknow'); +INSERT INTO products (product_id,product_name, group_id,price) VALUES (12, 'Others', 4, 200); + + +SELECT * +FROM +( + SELECT + product_name, + group_name, + price, + rank() OVER (PARTITION BY group_name ORDER BY price ASC) AS rank, + percentRank() OVER (PARTITION BY group_name ORDER BY price ASC) AS percent + FROM products + INNER JOIN product_groups USING (group_id) +) AS t +ORDER BY + group_name ASC, + price ASC, + product_name ASC; + +drop table product_groups; +drop table products; From fea03cf46ff29aa398b08d86ae77361fe85d7c40 Mon Sep 17 00:00:00 2001 From: Max K Date: Mon, 29 Jul 2024 21:07:24 +0200 Subject: [PATCH 422/661] Build results fix --- tests/ci/ci.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/ci/ci.py b/tests/ci/ci.py index e30062c32ff..935fe472e50 100644 --- a/tests/ci/ci.py +++ b/tests/ci/ci.py @@ -766,7 +766,9 @@ def _upload_build_artifacts( int(job_report.duration), GITHUB_JOB_API_URL(), head_ref=pr_info.head_ref, - pr_number=pr_info.number, + # PRInfo fetches pr number for release branches as well - set pr_number to 0 for release + # so that build results are not mistakenly treated as feature branch builds + pr_number=pr_info.number if pr_info.is_pr else 0, ) report_url = ci_cache.upload_build_report(build_result) print(f"Report file has been uploaded to [{report_url}]") From 3df2d88cf13ad552058a6958630741d7cdab9d3c Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 29 Jul 2024 21:09:11 +0200 Subject: [PATCH 423/661] Update CHANGELOG.md --- CHANGELOG.md | 1 - 1 file changed, 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 07b37835dda..620b7c99bac 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -45,7 +45,6 @@ * Add support for `cluster_for_parallel_replicas` when using custom key parallel replicas. It allows you to use parallel replicas with custom key with MergeTree tables. [#65453](https://github.com/ClickHouse/ClickHouse/pull/65453) ([Antonio Andelic](https://github.com/antonio2368)). #### Performance Improvement -* Enable `optimize_functions_to_subcolumns` by default. [#58661](https://github.com/ClickHouse/ClickHouse/pull/58661) ([Anton Popov](https://github.com/CurtizJ)). * Replace int to string algorithm with a faster one (from a modified amdn/itoa to a modified jeaiii/itoa). [#61661](https://github.com/ClickHouse/ClickHouse/pull/61661) ([Raúl Marín](https://github.com/Algunenano)). * Sizes of hash tables created by join (`parallel_hash` algorithm) is collected and cached now. This information will be used to preallocate space in hash tables for subsequent query executions and save time on hash table resizes. [#64553](https://github.com/ClickHouse/ClickHouse/pull/64553) ([Nikita Taranov](https://github.com/nickitat)). * Optimized queries with `ORDER BY` primary key and `WHERE` that have a condition with high selectivity by using of buffering. It is controlled by setting `read_in_order_use_buffering` (enabled by default) and can increase memory usage of query. [#64607](https://github.com/ClickHouse/ClickHouse/pull/64607) ([Anton Popov](https://github.com/CurtizJ)). From 756bde1158c4b3e6e65d324436291d53b9e25fbb Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Mon, 29 Jul 2024 20:27:15 +0100 Subject: [PATCH 424/661] rm file --- tests/performance/views_max_insert_threads.xml | 11 ----------- 1 file changed, 11 deletions(-) delete mode 100644 tests/performance/views_max_insert_threads.xml diff --git a/tests/performance/views_max_insert_threads.xml b/tests/performance/views_max_insert_threads.xml deleted file mode 100644 index 473bcd02ab8..00000000000 --- a/tests/performance/views_max_insert_threads.xml +++ /dev/null @@ -1,11 +0,0 @@ - - - - - - - - - - - From 2308a362a0aca716e2e50d3eb5283bdfd575e023 Mon Sep 17 00:00:00 2001 From: Michael Kolupaev Date: Mon, 29 Jul 2024 19:31:46 +0000 Subject: [PATCH 425/661] Disable 02932_refreshable_materialized_views --- .../02932_refreshable_materialized_views.sh | 304 +----------------- 1 file changed, 1 insertion(+), 303 deletions(-) diff --git a/tests/queries/0_stateless/02932_refreshable_materialized_views.sh b/tests/queries/0_stateless/02932_refreshable_materialized_views.sh index 9081035579d..6df3c391ddb 100755 --- a/tests/queries/0_stateless/02932_refreshable_materialized_views.sh +++ b/tests/queries/0_stateless/02932_refreshable_materialized_views.sh @@ -1,306 +1,4 @@ #!/usr/bin/env bash # Tags: atomic-database -CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# shellcheck source=../shell_config.sh -. "$CUR_DIR"/../shell_config.sh - -# Set session timezone to UTC to make all DateTime formatting and parsing use UTC, because refresh -# scheduling is done in UTC. -CLICKHOUSE_CLIENT="`echo "$CLICKHOUSE_CLIENT" | sed 's/--session_timezone[= ][^ ]*//g'`" -CLICKHOUSE_CLIENT="`echo "$CLICKHOUSE_CLIENT --allow_experimental_refreshable_materialized_view=1 --session_timezone Etc/UTC"`" - -$CLICKHOUSE_CLIENT -nq "create view refreshes as select * from system.view_refreshes where database = '$CLICKHOUSE_DATABASE' order by view" - - -# Basic refreshing. -$CLICKHOUSE_CLIENT -nq " - create materialized view a - refresh after 2 second - engine Memory - empty - as select number as x from numbers(2) union all select rand64() as x" -$CLICKHOUSE_CLIENT -nq "select '<1: created view>', view, remaining_dependencies, exception, last_refresh_result in ('Unknown', 'Finished') from refreshes"; -$CLICKHOUSE_CLIENT -nq "show create a" -# Wait for any refresh. (xargs trims the string and turns \t and \n into spaces) -while [ "`$CLICKHOUSE_CLIENT -nq "select last_refresh_result from refreshes -- $LINENO" | xargs`" == 'Unknown' ] -do - sleep 0.1 -done -start_time="`$CLICKHOUSE_CLIENT -nq "select reinterpret(now64(), 'Int64')"`" -# Check table contents. -$CLICKHOUSE_CLIENT -nq "select '<2: refreshed>', count(), sum(x=0), sum(x=1) from a" -# Wait for table contents to change. -res1="`$CLICKHOUSE_CLIENT -nq 'select * from a order by x format Values'`" -while : -do - res2="`$CLICKHOUSE_CLIENT -nq 'select * from a order by x format Values -- $LINENO'`" - [ "$res2" == "$res1" ] || break - sleep 0.1 -done -# Wait for another change. -while : -do - res3="`$CLICKHOUSE_CLIENT -nq 'select * from a order by x format Values -- $LINENO'`" - [ "$res3" == "$res2" ] || break - sleep 0.1 -done -# Check that the two changes were at least 1 second apart, in particular that we're not refreshing -# like crazy. This is potentially flaky, but we need at least one test that uses non-mocked timer -# to make sure the clock+timer code works at all. If it turns out flaky, increase refresh period above. -$CLICKHOUSE_CLIENT -nq " - select '<3: time difference at least>', min2(reinterpret(now64(), 'Int64') - $start_time, 1000); - select '<4: next refresh in>', next_refresh_time-last_refresh_time from refreshes;" - -# Create a source table from which views will read. -$CLICKHOUSE_CLIENT -nq " - create table src (x Int8) engine Memory as select 1" - -# Switch to fake clock, change refresh schedule, change query. -$CLICKHOUSE_CLIENT -nq " - system test view a set fake time '2050-01-01 00:00:01';" -while [ "`$CLICKHOUSE_CLIENT -nq "select status, last_refresh_time, next_refresh_time from refreshes -- $LINENO" | xargs`" != 'Scheduled 2050-01-01 00:00:01 2050-01-01 00:00:03' ] -do - sleep 0.1 -done -$CLICKHOUSE_CLIENT -nq " - alter table a modify refresh every 2 year; - alter table a modify query select x*2 as x from src; - select '<4.5: altered>', status, last_refresh_result, next_refresh_time from refreshes; - show create a;" -# Advance time to trigger the refresh. -$CLICKHOUSE_CLIENT -nq " - select '<5: no refresh>', count() from a; - system test view a set fake time '2052-02-03 04:05:06';" -while [ "`$CLICKHOUSE_CLIENT -nq "select last_refresh_time from refreshes -- $LINENO" | xargs`" != '2052-02-03 04:05:06' ] -do - sleep 0.1 -done -$CLICKHOUSE_CLIENT -nq " - select '<6: refreshed>', * from a; - select '<7: refreshed>', status, last_refresh_result, next_refresh_time from refreshes;" - -# Create a dependent view, refresh it once. -$CLICKHOUSE_CLIENT -nq " - create materialized view b refresh every 2 year depends on a (y Int32) engine MergeTree order by y empty as select x*10 as y from a; - show create b; - system test view b set fake time '2052-11-11 11:11:11'; - system refresh view b;" -while [ "`$CLICKHOUSE_CLIENT -nq "select last_refresh_time from refreshes where view = 'b' -- $LINENO" | xargs`" != '2052-11-11 11:11:11' ] -do - sleep 0.1 -done -# Next refresh shouldn't start until the dependency refreshes. -$CLICKHOUSE_CLIENT -nq " - select '<8: refreshed>', * from b; - select '<9: refreshed>', view, status, last_refresh_result, next_refresh_time from refreshes; - system test view b set fake time '2054-01-24 23:22:21';" -while [ "`$CLICKHOUSE_CLIENT -nq "select status, next_refresh_time from refreshes where view = 'b' -- $LINENO" | xargs`" != 'WaitingForDependencies 2054-01-01 00:00:00' ] -do - sleep 0.1 -done -# Update source table (by dropping and re-creating it - to test that tables are looked up by name -# rather than uuid), kick off refresh of the dependency. -$CLICKHOUSE_CLIENT -nq " - select '<10: waiting>', view, status, remaining_dependencies, next_refresh_time from refreshes; - drop table src; - create table src (x Int16) engine Memory as select 2; - system test view a set fake time '2054-01-01 00:00:01';" -while [ "`$CLICKHOUSE_CLIENT -nq "select status from refreshes where view = 'b' -- $LINENO" | xargs`" != 'Scheduled' ] -do - sleep 0.1 -done -# Both tables should've refreshed. -$CLICKHOUSE_CLIENT -nq " - select '<11: chain-refreshed a>', * from a; - select '<12: chain-refreshed b>', * from b; - select '<13: chain-refreshed>', view, status, remaining_dependencies, last_refresh_result, last_refresh_time, next_refresh_time, exception from refreshes;" - -# Make the dependent table run ahead by one refresh cycle, make sure it waits for the dependency to -# catch up to the same cycle. -$CLICKHOUSE_CLIENT -nq " - system test view b set fake time '2059-01-01 00:00:00'; - system refresh view b;" -while [ "`$CLICKHOUSE_CLIENT -nq "select next_refresh_time from refreshes where view = 'b' -- $LINENO" | xargs`" != '2060-01-01 00:00:00' ] -do - sleep 0.1 -done -$CLICKHOUSE_CLIENT -nq " - system test view b set fake time '2061-01-01 00:00:00'; - system test view a set fake time '2057-01-01 00:00:00';" -while [ "`$CLICKHOUSE_CLIENT -nq "select status, next_refresh_time from refreshes -- $LINENO" | xargs`" != 'Scheduled 2058-01-01 00:00:00 WaitingForDependencies 2060-01-01 00:00:00' ] -do - sleep 0.1 -done - -$CLICKHOUSE_CLIENT -nq " - select '<14: waiting for next cycle>', view, status, remaining_dependencies, next_refresh_time from refreshes; - truncate src; - insert into src values (3); - system test view a set fake time '2060-02-02 02:02:02';" -while [ "`$CLICKHOUSE_CLIENT -nq "select next_refresh_time from refreshes where view = 'b' -- $LINENO" | xargs`" != '2062-01-01 00:00:00' ] -do - sleep 0.1 -done -$CLICKHOUSE_CLIENT -nq " - select '<15: chain-refreshed a>', * from a; - select '<16: chain-refreshed b>', * from b; - select '<17: chain-refreshed>', view, status, next_refresh_time from refreshes;" - -# Get to WaitingForDependencies state and remove the depencency. -$CLICKHOUSE_CLIENT -nq " - system test view b set fake time '2062-03-03 03:03:03'" -while [ "`$CLICKHOUSE_CLIENT -nq "select status from refreshes where view = 'b' -- $LINENO" | xargs`" != 'WaitingForDependencies' ] -do - sleep 0.1 -done -$CLICKHOUSE_CLIENT -nq " - alter table b modify refresh every 2 year" -while [ "`$CLICKHOUSE_CLIENT -nq "select status, last_refresh_time from refreshes where view = 'b' -- $LINENO" | xargs`" != 'Scheduled 2062-03-03 03:03:03' ] -do - sleep 0.1 -done -$CLICKHOUSE_CLIENT -nq " - select '<18: removed dependency>', view, status, remaining_dependencies, last_refresh_time,next_refresh_time, refresh_count from refreshes where view = 'b'; - show create b;" - -# Select from a table that doesn't exist, get an exception. -$CLICKHOUSE_CLIENT -nq " - drop table a; - drop table b; - create materialized view c refresh every 1 second (x Int64) engine Memory empty as select * from src; - drop table src;" -while [ "`$CLICKHOUSE_CLIENT -nq "select last_refresh_result from refreshes where view = 'c' -- $LINENO" | xargs`" != 'Exception' ] -do - sleep 0.1 -done -# Check exception, create src, expect successful refresh. -$CLICKHOUSE_CLIENT -nq " - select '<19: exception>', exception ilike '%UNKNOWN_TABLE%' ? '1' : exception from refreshes where view = 'c'; - create table src (x Int64) engine Memory as select 1; - system refresh view c;" -while [ "`$CLICKHOUSE_CLIENT -nq "select last_refresh_result from refreshes -- $LINENO" | xargs`" != 'Finished' ] -do - sleep 0.1 -done -# Rename table. -$CLICKHOUSE_CLIENT -nq " - select '<20: unexception>', * from c; - rename table c to d; - select '<21: rename>', * from d; - select '<22: rename>', view, last_refresh_result from refreshes;" - -# Do various things during a refresh. -# First make a nonempty view. -$CLICKHOUSE_CLIENT -nq " - drop table d; - truncate src; - insert into src values (1) - create materialized view e refresh every 1 second (x Int64) engine MergeTree order by x empty as select x + sleepEachRow(1) as x from src settings max_block_size = 1;" -while [ "`$CLICKHOUSE_CLIENT -nq "select last_refresh_result from refreshes -- $LINENO" | xargs`" != 'Finished' ] -do - sleep 0.1 -done -# Stop refreshes. -$CLICKHOUSE_CLIENT -nq " - select '<23: simple refresh>', * from e; - system stop view e;" -while [ "`$CLICKHOUSE_CLIENT -nq "select status from refreshes -- $LINENO" | xargs`" != 'Disabled' ] -do - sleep 0.1 -done -# Make refreshes slow, wait for a slow refresh to start. (We stopped refreshes first to make sure -# we wait for a slow refresh, not a previous fast one.) -$CLICKHOUSE_CLIENT -nq " - insert into src select * from numbers(1000) settings max_block_size=1; - system start view e;" -while [ "`$CLICKHOUSE_CLIENT -nq "select status from refreshes -- $LINENO" | xargs`" != 'Running' ] -do - sleep 0.1 -done -# Rename. -$CLICKHOUSE_CLIENT -nq " - rename table e to f; - select '<24: rename during refresh>', * from f; - select '<25: rename during refresh>', view, status from refreshes where view = 'f'; - alter table f modify refresh after 10 year;" - -# Cancel. -$CLICKHOUSE_CLIENT -nq " - system cancel view f;" -while [ "`$CLICKHOUSE_CLIENT -nq "select last_refresh_result from refreshes where view = 'f' -- $LINENO" | xargs`" != 'Cancelled' ] -do - sleep 0.1 -done - -while [ "`$CLICKHOUSE_CLIENT -nq "select status from refreshes where view = 'f' -- $LINENO" | xargs`" = 'Running' ] -do - sleep 0.1 -done - -# Check that another refresh doesn't immediately start after the cancelled one. -$CLICKHOUSE_CLIENT -nq " - select '<27: cancelled>', view, status from refreshes where view = 'f'; - system refresh view f;" -while [ "`$CLICKHOUSE_CLIENT -nq "select status from refreshes where view = 'f' -- $LINENO" | xargs`" != 'Running' ] -do - sleep 0.1 -done -# Drop. -$CLICKHOUSE_CLIENT -nq " - drop table f; - select '<28: drop during refresh>', view, status from refreshes;" - -# Try OFFSET and RANDOMIZE FOR. -$CLICKHOUSE_CLIENT -nq " - create materialized view g refresh every 1 week offset 3 day 4 hour randomize for 4 day 1 hour (x Int64) engine Memory empty as select 42; - show create g; - system test view g set fake time '2050-02-03 15:30:13';" -while [ "`$CLICKHOUSE_CLIENT -nq "select next_refresh_time > '2049-01-01' from refreshes -- $LINENO" | xargs`" != '1' ] -do - sleep 0.1 -done -$CLICKHOUSE_CLIENT -nq " - with '2050-02-10 04:00:00'::DateTime as expected - select '<29: randomize>', abs(next_refresh_time::Int64 - expected::Int64) <= 3600*(24*4+1), next_refresh_time != expected from refreshes;" - -# Send data 'TO' an existing table. -$CLICKHOUSE_CLIENT -nq " - drop table g; - create table dest (x Int64) engine MergeTree order by x; - truncate src; - insert into src values (1); - create materialized view h refresh every 1 second to dest empty as select x*10 as x from src; - show create h;" -while [ "`$CLICKHOUSE_CLIENT -nq "select last_refresh_result from refreshes -- $LINENO" | xargs`" != 'Finished' ] -do - sleep 0.1 -done -$CLICKHOUSE_CLIENT -nq " - select '<30: to existing table>', * from dest; - insert into src values (2);" -while [ "`$CLICKHOUSE_CLIENT -nq "select count() from dest -- $LINENO" | xargs`" != '2' ] -do - sleep 0.1 -done -$CLICKHOUSE_CLIENT -nq " - select '<31: to existing table>', * from dest; - drop table dest; - drop table src; - drop table h;" - -# EMPTY -$CLICKHOUSE_CLIENT -nq " - create materialized view i refresh after 1 year engine Memory empty as select number as x from numbers(2); - create materialized view j refresh after 1 year engine Memory as select number as x from numbers(2)" -while [ "`$CLICKHOUSE_CLIENT -nq "select sum(last_success_time is null) from refreshes -- $LINENO" | xargs`" == '2' ] -do - sleep 0.1 -done -$CLICKHOUSE_CLIENT -nq " - select '<32: empty>', view, status, last_refresh_result from refreshes order by view; - drop table i; - drop table j" - -$CLICKHOUSE_CLIENT -nq " - drop table refreshes;" +# TODO: Re-add this test in https://github.com/ClickHouse/ClickHouse/pull/58934 From f3d5859c8bd5d1fb43f0d636d6cff3062e4ca267 Mon Sep 17 00:00:00 2001 From: avogar Date: Mon, 29 Jul 2024 19:35:37 +0000 Subject: [PATCH 426/661] Fix new test --- ...dynamic_read_subcolumns_small.reference.j2 | 192 +++++++++--------- ...03036_dynamic_read_subcolumns_small.sql.j2 | 16 +- 2 files changed, 104 insertions(+), 104 deletions(-) diff --git a/tests/queries/0_stateless/03036_dynamic_read_subcolumns_small.reference.j2 b/tests/queries/0_stateless/03036_dynamic_read_subcolumns_small.reference.j2 index be3f4e53990..d6add681f51 100644 --- a/tests/queries/0_stateless/03036_dynamic_read_subcolumns_small.reference.j2 +++ b/tests/queries/0_stateless/03036_dynamic_read_subcolumns_small.reference.j2 @@ -25,26 +25,26 @@ UInt64 7 7 \N [] 8 8 \N [] 9 9 \N [] -str_10 \N str_10 [] [[0]] \N \N [] -str_11 \N str_11 [] +str_10 \N str_10 [] [[0,1]] \N \N [] -str_12 \N str_12 [] +str_11 \N str_11 [] [[0,1,2]] \N \N [] -str_13 \N str_13 [] +str_12 \N str_12 [] [[0,1,2,3]] \N \N [] -str_14 \N str_14 [] +str_13 \N str_13 [] [[0,1,2,3,4]] \N \N [] -str_15 \N str_15 [] +str_14 \N str_14 [] [[0,1,2,3,4,5]] \N \N [] -str_16 \N str_16 [] +str_15 \N str_15 [] [[0,1,2,3,4,5,6]] \N \N [] -str_17 \N str_17 [] +str_16 \N str_16 [] [[0,1,2,3,4,5,6,7]] \N \N [] -str_18 \N str_18 [] +str_17 \N str_17 [] [[0,1,2,3,4,5,6,7,8]] \N \N [] -str_19 \N str_19 [] +str_18 \N str_18 [] [[0,1,2,3,4,5,6,7,8,9]] \N \N [] +str_19 \N str_19 [] [20] \N \N [20] ['str_21','str_21'] \N \N ['str_21','str_21'] [22,22,22] \N \N [22,22,22] @@ -115,6 +115,7 @@ str_79 \N str_79 [] 7 \N [] 8 \N [] 9 \N [] +\N \N [] \N str_10 [] \N \N [] \N str_11 [] @@ -134,7 +135,6 @@ str_79 \N str_79 [] \N str_18 [] \N \N [] \N str_19 [] -\N \N [] \N \N [20] \N \N ['str_21','str_21'] \N \N [22,22,22] @@ -295,26 +295,26 @@ str_79 \N str_79 [] 7 7 \N [] 0 [] 8 8 \N [] 0 [] 9 9 \N [] 0 [] -str_10 \N \N [] 0 [] [[0]] \N \N [] 0 [] -str_11 \N \N [] 0 [] +str_10 \N \N [] 0 [] [[0,1]] \N \N [] 0 [] -str_12 \N \N [] 0 [] +str_11 \N \N [] 0 [] [[0,1,2]] \N \N [] 0 [] -str_13 \N \N [] 0 [] +str_12 \N \N [] 0 [] [[0,1,2,3]] \N \N [] 0 [] -str_14 \N \N [] 0 [] +str_13 \N \N [] 0 [] [[0,1,2,3,4]] \N \N [] 0 [] -str_15 \N \N [] 0 [] +str_14 \N \N [] 0 [] [[0,1,2,3,4,5]] \N \N [] 0 [] -str_16 \N \N [] 0 [] +str_15 \N \N [] 0 [] [[0,1,2,3,4,5,6]] \N \N [] 0 [] -str_17 \N \N [] 0 [] +str_16 \N \N [] 0 [] [[0,1,2,3,4,5,6,7]] \N \N [] 0 [] -str_18 \N \N [] 0 [] +str_17 \N \N [] 0 [] [[0,1,2,3,4,5,6,7,8]] \N \N [] 0 [] -str_19 \N \N [] 0 [] +str_18 \N \N [] 0 [] [[0,1,2,3,4,5,6,7,8,9]] \N \N [] 0 [] +str_19 \N \N [] 0 [] [20] \N \N [20] 1 [20] ['str_21','str_21'] \N \N ['str_21','str_21'] 2 [NULL,NULL] [22,22,22] \N \N [22,22,22] 3 [22,22,22] @@ -475,26 +475,26 @@ str_79 \N \N [] 0 [] 7 0 [] [] 8 0 [] [] 9 0 [] [] -str_10 0 [] [] [[0]] 0 [] [] -str_11 0 [] [] +str_10 0 [] [] [[0,1]] 0 [] [] -str_12 0 [] [] +str_11 0 [] [] [[0,1,2]] 0 [] [] -str_13 0 [] [] +str_12 0 [] [] [[0,1,2,3]] 0 [] [] -str_14 0 [] [] +str_13 0 [] [] [[0,1,2,3,4]] 0 [] [] -str_15 0 [] [] +str_14 0 [] [] [[0,1,2,3,4,5]] 0 [] [] -str_16 0 [] [] +str_15 0 [] [] [[0,1,2,3,4,5,6]] 0 [] [] -str_17 0 [] [] +str_16 0 [] [] [[0,1,2,3,4,5,6,7]] 0 [] [] -str_18 0 [] [] +str_17 0 [] [] [[0,1,2,3,4,5,6,7,8]] 0 [] [] -str_19 0 [] [] +str_18 0 [] [] [[0,1,2,3,4,5,6,7,8,9]] 0 [] [] +str_19 0 [] [] [20] 0 [] [20] ['str_21','str_21'] 0 [] [NULL,NULL] [22,22,22] 0 [] [22,22,22] @@ -655,7 +655,6 @@ str_79 0 [] [] [] [] [] [] [] [] [] [] [] -[] [] [] [1] [[0]] [[[]]] [] [] [] [2] [[0,1]] [[[],[]]] @@ -735,6 +734,7 @@ str_79 0 [] [] [] [] [] [] [] [] [] [] [] +[] [] [] Array(Array(Dynamic)) Array(Variant(String, UInt64)) None @@ -762,26 +762,26 @@ UInt64 7 7 \N [] 8 8 \N [] 9 9 \N [] -str_10 \N str_10 [] [[0]] \N \N [] -str_11 \N str_11 [] +str_10 \N str_10 [] [[0,1]] \N \N [] -str_12 \N str_12 [] +str_11 \N str_11 [] [[0,1,2]] \N \N [] -str_13 \N str_13 [] +str_12 \N str_12 [] [[0,1,2,3]] \N \N [] -str_14 \N str_14 [] +str_13 \N str_13 [] [[0,1,2,3,4]] \N \N [] -str_15 \N str_15 [] +str_14 \N str_14 [] [[0,1,2,3,4,5]] \N \N [] -str_16 \N str_16 [] +str_15 \N str_15 [] [[0,1,2,3,4,5,6]] \N \N [] -str_17 \N str_17 [] +str_16 \N str_16 [] [[0,1,2,3,4,5,6,7]] \N \N [] -str_18 \N str_18 [] +str_17 \N str_17 [] [[0,1,2,3,4,5,6,7,8]] \N \N [] -str_19 \N str_19 [] +str_18 \N str_18 [] [[0,1,2,3,4,5,6,7,8,9]] \N \N [] +str_19 \N str_19 [] [20] \N \N [20] ['str_21','str_21'] \N \N ['str_21','str_21'] [22,22,22] \N \N [22,22,22] @@ -852,6 +852,7 @@ str_79 \N str_79 [] 7 \N [] 8 \N [] 9 \N [] +\N \N [] \N str_10 [] \N \N [] \N str_11 [] @@ -871,7 +872,6 @@ str_79 \N str_79 [] \N str_18 [] \N \N [] \N str_19 [] -\N \N [] \N \N [20] \N \N ['str_21','str_21'] \N \N [22,22,22] @@ -1032,26 +1032,26 @@ str_79 \N str_79 [] 7 7 \N [] 0 [] 8 8 \N [] 0 [] 9 9 \N [] 0 [] -str_10 \N \N [] 0 [] [[0]] \N \N [] 0 [] -str_11 \N \N [] 0 [] +str_10 \N \N [] 0 [] [[0,1]] \N \N [] 0 [] -str_12 \N \N [] 0 [] +str_11 \N \N [] 0 [] [[0,1,2]] \N \N [] 0 [] -str_13 \N \N [] 0 [] +str_12 \N \N [] 0 [] [[0,1,2,3]] \N \N [] 0 [] -str_14 \N \N [] 0 [] +str_13 \N \N [] 0 [] [[0,1,2,3,4]] \N \N [] 0 [] -str_15 \N \N [] 0 [] +str_14 \N \N [] 0 [] [[0,1,2,3,4,5]] \N \N [] 0 [] -str_16 \N \N [] 0 [] +str_15 \N \N [] 0 [] [[0,1,2,3,4,5,6]] \N \N [] 0 [] -str_17 \N \N [] 0 [] +str_16 \N \N [] 0 [] [[0,1,2,3,4,5,6,7]] \N \N [] 0 [] -str_18 \N \N [] 0 [] +str_17 \N \N [] 0 [] [[0,1,2,3,4,5,6,7,8]] \N \N [] 0 [] -str_19 \N \N [] 0 [] +str_18 \N \N [] 0 [] [[0,1,2,3,4,5,6,7,8,9]] \N \N [] 0 [] +str_19 \N \N [] 0 [] [20] \N \N [20] 1 [20] ['str_21','str_21'] \N \N ['str_21','str_21'] 2 [NULL,NULL] [22,22,22] \N \N [22,22,22] 3 [22,22,22] @@ -1212,26 +1212,26 @@ str_79 \N \N [] 0 [] 7 0 [] [] 8 0 [] [] 9 0 [] [] -str_10 0 [] [] [[0]] 0 [] [] -str_11 0 [] [] +str_10 0 [] [] [[0,1]] 0 [] [] -str_12 0 [] [] +str_11 0 [] [] [[0,1,2]] 0 [] [] -str_13 0 [] [] +str_12 0 [] [] [[0,1,2,3]] 0 [] [] -str_14 0 [] [] +str_13 0 [] [] [[0,1,2,3,4]] 0 [] [] -str_15 0 [] [] +str_14 0 [] [] [[0,1,2,3,4,5]] 0 [] [] -str_16 0 [] [] +str_15 0 [] [] [[0,1,2,3,4,5,6]] 0 [] [] -str_17 0 [] [] +str_16 0 [] [] [[0,1,2,3,4,5,6,7]] 0 [] [] -str_18 0 [] [] +str_17 0 [] [] [[0,1,2,3,4,5,6,7,8]] 0 [] [] -str_19 0 [] [] +str_18 0 [] [] [[0,1,2,3,4,5,6,7,8,9]] 0 [] [] +str_19 0 [] [] [20] 0 [] [20] ['str_21','str_21'] 0 [] [NULL,NULL] [22,22,22] 0 [] [22,22,22] @@ -1392,7 +1392,6 @@ str_79 0 [] [] [] [] [] [] [] [] [] [] [] -[] [] [] [1] [[0]] [[[]]] [] [] [] [2] [[0,1]] [[[],[]]] @@ -1472,6 +1471,7 @@ str_79 0 [] [] [] [] [] [] [] [] [] [] [] +[] [] [] Array(Array(Dynamic)) Array(Variant(String, UInt64)) None @@ -1499,26 +1499,26 @@ UInt64 7 7 \N [] 8 8 \N [] 9 9 \N [] -str_10 \N str_10 [] [[0]] \N \N [] -str_11 \N str_11 [] +str_10 \N str_10 [] [[0,1]] \N \N [] -str_12 \N str_12 [] +str_11 \N str_11 [] [[0,1,2]] \N \N [] -str_13 \N str_13 [] +str_12 \N str_12 [] [[0,1,2,3]] \N \N [] -str_14 \N str_14 [] +str_13 \N str_13 [] [[0,1,2,3,4]] \N \N [] -str_15 \N str_15 [] +str_14 \N str_14 [] [[0,1,2,3,4,5]] \N \N [] -str_16 \N str_16 [] +str_15 \N str_15 [] [[0,1,2,3,4,5,6]] \N \N [] -str_17 \N str_17 [] +str_16 \N str_16 [] [[0,1,2,3,4,5,6,7]] \N \N [] -str_18 \N str_18 [] +str_17 \N str_17 [] [[0,1,2,3,4,5,6,7,8]] \N \N [] -str_19 \N str_19 [] +str_18 \N str_18 [] [[0,1,2,3,4,5,6,7,8,9]] \N \N [] +str_19 \N str_19 [] [20] \N \N [20] ['str_21','str_21'] \N \N ['str_21','str_21'] [22,22,22] \N \N [22,22,22] @@ -1589,6 +1589,7 @@ str_79 \N str_79 [] 7 \N [] 8 \N [] 9 \N [] +\N \N [] \N str_10 [] \N \N [] \N str_11 [] @@ -1608,7 +1609,6 @@ str_79 \N str_79 [] \N str_18 [] \N \N [] \N str_19 [] -\N \N [] \N \N [20] \N \N ['str_21','str_21'] \N \N [22,22,22] @@ -1769,26 +1769,26 @@ str_79 \N str_79 [] 7 7 \N [] 0 [] 8 8 \N [] 0 [] 9 9 \N [] 0 [] -str_10 \N \N [] 0 [] [[0]] \N \N [] 0 [] -str_11 \N \N [] 0 [] +str_10 \N \N [] 0 [] [[0,1]] \N \N [] 0 [] -str_12 \N \N [] 0 [] +str_11 \N \N [] 0 [] [[0,1,2]] \N \N [] 0 [] -str_13 \N \N [] 0 [] +str_12 \N \N [] 0 [] [[0,1,2,3]] \N \N [] 0 [] -str_14 \N \N [] 0 [] +str_13 \N \N [] 0 [] [[0,1,2,3,4]] \N \N [] 0 [] -str_15 \N \N [] 0 [] +str_14 \N \N [] 0 [] [[0,1,2,3,4,5]] \N \N [] 0 [] -str_16 \N \N [] 0 [] +str_15 \N \N [] 0 [] [[0,1,2,3,4,5,6]] \N \N [] 0 [] -str_17 \N \N [] 0 [] +str_16 \N \N [] 0 [] [[0,1,2,3,4,5,6,7]] \N \N [] 0 [] -str_18 \N \N [] 0 [] +str_17 \N \N [] 0 [] [[0,1,2,3,4,5,6,7,8]] \N \N [] 0 [] -str_19 \N \N [] 0 [] +str_18 \N \N [] 0 [] [[0,1,2,3,4,5,6,7,8,9]] \N \N [] 0 [] +str_19 \N \N [] 0 [] [20] \N \N [20] 1 [20] ['str_21','str_21'] \N \N ['str_21','str_21'] 2 [NULL,NULL] [22,22,22] \N \N [22,22,22] 3 [22,22,22] @@ -1949,26 +1949,26 @@ str_79 \N \N [] 0 [] 7 0 [] [] 8 0 [] [] 9 0 [] [] -str_10 0 [] [] [[0]] 0 [] [] -str_11 0 [] [] +str_10 0 [] [] [[0,1]] 0 [] [] -str_12 0 [] [] +str_11 0 [] [] [[0,1,2]] 0 [] [] -str_13 0 [] [] +str_12 0 [] [] [[0,1,2,3]] 0 [] [] -str_14 0 [] [] +str_13 0 [] [] [[0,1,2,3,4]] 0 [] [] -str_15 0 [] [] +str_14 0 [] [] [[0,1,2,3,4,5]] 0 [] [] -str_16 0 [] [] +str_15 0 [] [] [[0,1,2,3,4,5,6]] 0 [] [] -str_17 0 [] [] +str_16 0 [] [] [[0,1,2,3,4,5,6,7]] 0 [] [] -str_18 0 [] [] +str_17 0 [] [] [[0,1,2,3,4,5,6,7,8]] 0 [] [] -str_19 0 [] [] +str_18 0 [] [] [[0,1,2,3,4,5,6,7,8,9]] 0 [] [] +str_19 0 [] [] [20] 0 [] [20] ['str_21','str_21'] 0 [] [NULL,NULL] [22,22,22] 0 [] [22,22,22] @@ -2129,7 +2129,6 @@ str_79 0 [] [] [] [] [] [] [] [] [] [] [] -[] [] [] [1] [[0]] [[[]]] [] [] [] [2] [[0,1]] [[[],[]]] @@ -2209,3 +2208,4 @@ str_79 0 [] [] [] [] [] [] [] [] [] [] [] +[] [] [] diff --git a/tests/queries/0_stateless/03036_dynamic_read_subcolumns_small.sql.j2 b/tests/queries/0_stateless/03036_dynamic_read_subcolumns_small.sql.j2 index 1ed836fbeee..3253d7a6c68 100644 --- a/tests/queries/0_stateless/03036_dynamic_read_subcolumns_small.sql.j2 +++ b/tests/queries/0_stateless/03036_dynamic_read_subcolumns_small.sql.j2 @@ -29,14 +29,14 @@ select count() from test where not empty(d.`Array(Array(Dynamic))`); select count() from test where d is NULL; select count() from test where not empty(d.`Tuple(a Array(Dynamic))`.a.String); -select d, d.UInt64, d.String, d.`Array(Variant(String, UInt64))` from test order by id; -select d.UInt64, d.String, d.`Array(Variant(String, UInt64))` from test order by id; -select d.Int8, d.Date, d.`Array(String)` from test order by id; -select d, d.UInt64, d.Date, d.`Array(Variant(String, UInt64))`, d.`Array(Variant(String, UInt64))`.size0, d.`Array(Variant(String, UInt64))`.UInt64 from test order by id; -select d.UInt64, d.Date, d.`Array(Variant(String, UInt64))`, d.`Array(Variant(String, UInt64))`.size0, d.`Array(Variant(String, UInt64))`.UInt64, d.`Array(Variant(String, UInt64))`.String from test order by id; -select d, d.`Tuple(a UInt64, b String)`.a, d.`Array(Dynamic)`.`Variant(String, UInt64)`.UInt64, d.`Array(Variant(String, UInt64))`.UInt64 from test order by id; -select d.`Array(Dynamic)`.`Variant(String, UInt64)`.UInt64, d.`Array(Dynamic)`.size0, d.`Array(Variant(String, UInt64))`.UInt64 from test order by id; -select d.`Array(Array(Dynamic))`.size1, d.`Array(Array(Dynamic))`.UInt64, d.`Array(Array(Dynamic))`.`Map(String, Tuple(a UInt64))`.values.a from test order by id; +select d, d.UInt64, d.String, d.`Array(Variant(String, UInt64))` from test order by id, d; +select d.UInt64, d.String, d.`Array(Variant(String, UInt64))` from test order by id, d; +select d.Int8, d.Date, d.`Array(String)` from test order by id, d; +select d, d.UInt64, d.Date, d.`Array(Variant(String, UInt64))`, d.`Array(Variant(String, UInt64))`.size0, d.`Array(Variant(String, UInt64))`.UInt64 from test order by id, d; +select d.UInt64, d.Date, d.`Array(Variant(String, UInt64))`, d.`Array(Variant(String, UInt64))`.size0, d.`Array(Variant(String, UInt64))`.UInt64, d.`Array(Variant(String, UInt64))`.String from test order by id, d; +select d, d.`Tuple(a UInt64, b String)`.a, d.`Array(Dynamic)`.`Variant(String, UInt64)`.UInt64, d.`Array(Variant(String, UInt64))`.UInt64 from test order by id, d; +select d.`Array(Dynamic)`.`Variant(String, UInt64)`.UInt64, d.`Array(Dynamic)`.size0, d.`Array(Variant(String, UInt64))`.UInt64 from test order by id, d; +select d.`Array(Array(Dynamic))`.size1, d.`Array(Array(Dynamic))`.UInt64, d.`Array(Array(Dynamic))`.`Map(String, Tuple(a UInt64))`.values.a from test order by id, d; drop table test; From 8dfe4a93f6c1afde8475984a899cd5604f415d78 Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Mon, 29 Jul 2024 22:07:10 +0200 Subject: [PATCH 427/661] Rewrite function get_broken_projections_info() without using system.errors --- .../test_broken_projections/test.py | 86 +++++++++---------- 1 file changed, 42 insertions(+), 44 deletions(-) diff --git a/tests/integration/test_broken_projections/test.py b/tests/integration/test_broken_projections/test.py index 2cbbee5563f..9493937d936 100644 --- a/tests/integration/test_broken_projections/test.py +++ b/tests/integration/test_broken_projections/test.py @@ -148,23 +148,22 @@ def break_part(node, table, part): bash(node, f"rm '{part_path}/columns.txt'") -def get_broken_projections_info(node, table, active=True): +def get_broken_projections_info(node, table, part=None, projection=None, active=True): + parent_name_filter = f" AND parent_name = '{part}'" if part else "" + name_filter = f" AND name = '{projection}'" if projection else "" return node.query( f""" - SELECT parent_name, name, errors.name FROM - ( - SELECT parent_name, name, exception_code + SELECT parent_name, name, exception FROM system.projection_parts WHERE table='{table}' AND database=currentDatabase() AND is_broken = 1 AND active = {active} - ) AS parts_info - INNER JOIN system.errors AS errors - ON parts_info.exception_code = errors.code + {parent_name_filter} + {name_filter} ORDER BY parent_name, name """ - ).strip() + ) def get_projections_info(node, table): @@ -312,8 +311,8 @@ def test_broken_ignored(cluster): # Projection 'proj1' from part all_2_2_0 will now appear in broken parts info # because it was marked broken during "check table" query. - assert "all_2_2_0\tproj1\tFILE_DOESNT_EXIST" in get_broken_projections_info( - node, table_name + assert "FILE_DOESNT_EXIST" in get_broken_projections_info( + node, table_name, part="all_2_2_0", projection="proj1" ) # Check table query will also show a list of parts which have broken projections. @@ -323,14 +322,14 @@ def test_broken_ignored(cluster): break_projection(node, table_name, "proj2", "all_2_2_0", "data") # It will not yet appear in broken projections info. - assert "proj2" not in get_broken_projections_info(node, table_name) + assert not get_broken_projections_info(node, table_name, projection="proj2") # Select now fails with error "File doesn't exist" check(node, table_name, 0, "proj2", "FILE_DOESNT_EXIST") # Projection 'proj2' from part all_2_2_0 will now appear in broken parts info. - assert "all_2_2_0\tproj2\tNO_FILE_IN_DATA_PART" in get_broken_projections_info( - node, table_name + assert "NO_FILE_IN_DATA_PART" in get_broken_projections_info( + node, table_name, part="all_2_2_0", projection="proj2" ) # Second select works, because projection is now marked as broken. @@ -340,7 +339,7 @@ def test_broken_ignored(cluster): break_projection(node, table_name, "proj2", "all_3_3_0", "data") # It will not yet appear in broken projections info. - assert "all_3_3_0" not in get_broken_projections_info(node, table_name) + assert not get_broken_projections_info(node, table_name, part="all_3_3_0") insert(node, table_name, 20, 5) insert(node, table_name, 25, 5) @@ -371,8 +370,8 @@ def test_broken_ignored(cluster): node, table_name ) - assert "all_3_3_0" in get_broken_projections_info(node, table_name, active=False) - assert "all_2_2_0" in get_broken_projections_info(node, table_name, active=True) + assert get_broken_projections_info(node, table_name, part="all_3_3_0", active=False) + assert get_broken_projections_info(node, table_name, part="all_2_2_0", active=True) # 0 because of all_2_2_0 check(node, table_name, 0) @@ -396,8 +395,8 @@ def test_materialize_broken_projection(cluster): break_projection(node, table_name, "proj1", "all_1_1_0", "metadata") reattach(node, table_name) - assert "all_1_1_0\tproj1\tNO_FILE_IN_DATA_PART" in get_broken_projections_info( - node, table_name + assert "NO_FILE_IN_DATA_PART" in get_broken_projections_info( + node, table_name, part="all_1_1_0", projection="proj1" ) assert "Part all_1_1_0 has a broken projection proj1" in check_table_full( node, table_name @@ -406,8 +405,8 @@ def test_materialize_broken_projection(cluster): break_projection(node, table_name, "proj2", "all_1_1_0", "data") reattach(node, table_name) - assert "all_1_1_0\tproj2\tFILE_DOESNT_EXIST" in get_broken_projections_info( - node, table_name + assert "FILE_DOESNT_EXIST" in get_broken_projections_info( + node, table_name, part="all_1_1_0", projection="proj2" ) assert "Part all_1_1_0 has a broken projection proj2" in check_table_full( node, table_name @@ -469,8 +468,8 @@ def test_broken_projections_in_backups_2(cluster): break_projection(node, table_name, "proj2", "all_2_2_0", "part") check(node, table_name, 0, "proj2", "ErrnoException") - assert "all_2_2_0\tproj2\tFILE_DOESNT_EXIST" == get_broken_projections_info( - node, table_name + assert "FILE_DOESNT_EXIST" in get_broken_projections_info( + node, table_name, part="all_2_2_0", projection="proj2" ) assert "FILE_DOESNT_EXIST" in node.query_and_get_error( @@ -524,8 +523,8 @@ def test_broken_projections_in_backups_3(cluster): assert "Part all_1_1_0 has a broken projection proj1" in check_table_full( node, table_name ) - assert "all_1_1_0\tproj1\tFILE_DOESNT_EXIST" == get_broken_projections_info( - node, table_name + assert "FILE_DOESNT_EXIST" in get_broken_projections_info( + node, table_name, part="all_1_1_0", projection="proj1" ) backup_name = f"b4-{get_random_string()}" @@ -545,8 +544,11 @@ def test_broken_projections_in_backups_3(cluster): ) check(node, table_name, 0) - assert "all_1_1_0\tproj1\tNO_FILE_IN_DATA_PART" == get_broken_projections_info( - node, table_name + assert ( + "Projection directory proj1.proj does not exist while loading projections" + in get_broken_projections_info( + node, table_name, part="all_1_1_0", projection="proj1" + ) ) @@ -569,7 +571,7 @@ def test_check_part_thread(cluster): break_projection(node, table_name, "proj2", "all_2_2_0", "data") # It will not yet appear in broken projections info. - assert "proj2" not in get_broken_projections_info(node, table_name) + assert not get_broken_projections_info(node, table_name, projection="proj2") # Select now fails with error "File doesn't exist" check(node, table_name, 0, "proj2", "FILE_DOESNT_EXIST", do_check_command=False) @@ -606,15 +608,15 @@ def test_broken_on_start(cluster): break_projection(node, table_name, "proj2", "all_2_2_0", "data") # It will not yet appear in broken projections info. - assert "proj2" not in get_broken_projections_info(node, table_name) + assert not get_broken_projections_info(node, table_name, projection="proj2") # Select now fails with error "File doesn't exist" # We will mark projection as broken. check(node, table_name, 0, "proj2", "FILE_DOESNT_EXIST") # Projection 'proj2' from part all_2_2_0 will now appear in broken parts info. - assert "all_2_2_0\tproj2\tNO_FILE_IN_DATA_PART" in get_broken_projections_info( - node, table_name + assert "NO_FILE_IN_DATA_PART" in get_broken_projections_info( + node, table_name, part="all_2_2_0", projection="proj2" ) # Second select works, because projection is now marked as broken. @@ -623,7 +625,7 @@ def test_broken_on_start(cluster): node.restart_clickhouse() # It will not yet appear in broken projections info. - assert "proj2" in get_broken_projections_info(node, table_name) + assert get_broken_projections_info(node, table_name, projection="proj2") # Select works check(node, table_name, 0) @@ -654,7 +656,7 @@ def test_mutation_with_broken_projection(cluster): node, table_name ) - assert "" == get_broken_projections_info(node, table_name) + assert not get_broken_projections_info(node, table_name) check(node, table_name, 1) @@ -662,21 +664,21 @@ def test_mutation_with_broken_projection(cluster): break_projection(node, table_name, "proj2", "all_2_2_0_4", "data") # It will not yet appear in broken projections info. - assert "proj2" not in get_broken_projections_info(node, table_name) + assert not get_broken_projections_info(node, table_name, projection="proj2") # Select now fails with error "File doesn't exist" # We will mark projection as broken. check(node, table_name, 0, "proj2", "FILE_DOESNT_EXIST") # Projection 'proj2' from part all_2_2_0_4 will now appear in broken parts info. - assert "all_2_2_0_4\tproj2\tNO_FILE_IN_DATA_PART" in get_broken_projections_info( - node, table_name + assert "NO_FILE_IN_DATA_PART" in get_broken_projections_info( + node, table_name, part="all_2_2_0_4", projection="proj2" ) # Second select works, because projection is now marked as broken. check(node, table_name, 0) - assert "all_2_2_0_4" in get_broken_projections_info(node, table_name) + assert get_broken_projections_info(node, table_name, part="all_2_2_0_4") node.query( f"ALTER TABLE {table_name} DELETE WHERE _part == 'all_0_0_0_4' SETTINGS mutations_sync = 1" @@ -690,14 +692,10 @@ def test_mutation_with_broken_projection(cluster): # Still broken because it was hardlinked. broken = get_broken_projections_info(node, table_name) - assert ( - "all_2_2_0_5" in broken or "" == broken - ) # second could be because of a merge. + if broken: # can be not broken because of a merge. + assert get_broken_projections_info(node, table_name, part="all_2_2_0_5") - if "" == broken: - check(node, table_name, 1) - else: - check(node, table_name, 0) + check(node, table_name, not broken) node.query( f"ALTER TABLE {table_name} DELETE WHERE c == 13 SETTINGS mutations_sync = 1" @@ -710,6 +708,6 @@ def test_mutation_with_broken_projection(cluster): ) # Not broken anymore. - assert "" == get_broken_projections_info(node, table_name) + assert not get_broken_projections_info(node, table_name) check(node, table_name, 1) From f94bebb0530b7a9fdd3db104ad4261a467fafad3 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Mon, 29 Jul 2024 22:36:59 +0000 Subject: [PATCH 428/661] fix --- docs/en/sql-reference/window-functions/lagInFrame.md | 2 +- docs/en/sql-reference/window-functions/leadInFrame.md | 2 +- src/Processors/Transforms/WindowTransform.cpp | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/en/sql-reference/window-functions/lagInFrame.md b/docs/en/sql-reference/window-functions/lagInFrame.md index 049e095c10f..de6e9005baa 100644 --- a/docs/en/sql-reference/window-functions/lagInFrame.md +++ b/docs/en/sql-reference/window-functions/lagInFrame.md @@ -23,7 +23,7 @@ For more detail on window function syntax see: [Window Functions - Syntax](./ind **Parameters** - `x` — Column name. - `offset` — Offset to apply. [(U)Int*](../data-types/int-uint.md). (Optional - `1` by default). -- `default` — Value to return if calculated row exceeds the boundaries of the window frame. (Optional - `null` by default). +- `default` — Value to return if calculated row exceeds the boundaries of the window frame. (Optional - default value of column type when omitted). **Returned value** diff --git a/docs/en/sql-reference/window-functions/leadInFrame.md b/docs/en/sql-reference/window-functions/leadInFrame.md index fc1b92cc266..4a82c03f6e6 100644 --- a/docs/en/sql-reference/window-functions/leadInFrame.md +++ b/docs/en/sql-reference/window-functions/leadInFrame.md @@ -23,7 +23,7 @@ For more detail on window function syntax see: [Window Functions - Syntax](./ind **Parameters** - `x` — Column name. - `offset` — Offset to apply. [(U)Int*](../data-types/int-uint.md). (Optional - `1` by default). -- `default` — Value to return if calculated row exceeds the boundaries of the window frame. (Optional - `null` by default). +- `default` — Value to return if calculated row exceeds the boundaries of the window frame. (Optional - default value of column type when omitted). **Returned value** diff --git a/src/Processors/Transforms/WindowTransform.cpp b/src/Processors/Transforms/WindowTransform.cpp index 1eac08780e9..f76e2d64368 100644 --- a/src/Processors/Transforms/WindowTransform.cpp +++ b/src/Processors/Transforms/WindowTransform.cpp @@ -2408,7 +2408,7 @@ struct WindowFunctionLagLeadInFrame final : public WindowFunction if (argument_types[0]->equals(*argument_types[2])) return; - const auto supertype = getLeastSupertype(DataTypes{argument_types[0], argument_types[2]}); + const auto supertype = tryGetLeastSupertype(DataTypes{argument_types[0], argument_types[2]}); if (!supertype) { throw Exception(ErrorCodes::BAD_ARGUMENTS, From 2aafd711463d30cb7803a054f434268335817db8 Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Fri, 26 Jul 2024 19:08:07 +0200 Subject: [PATCH 429/661] Moved settings to ServerSettings and made the table drop even faster (cherry picked from commit e1eb542dcc2b9fbc6a470a3cd9a183e79c86d7c7) --- programs/local/LocalServer.cpp | 5 +++ programs/server/Server.cpp | 5 +++ src/Core/ServerSettings.cpp | 2 +- src/Core/ServerSettings.h | 9 ++++ src/IO/SharedThreadPools.cpp | 10 +++++ src/IO/SharedThreadPools.h | 3 ++ src/Interpreters/DatabaseCatalog.cpp | 50 ++++++++++------------- src/Interpreters/DatabaseCatalog.h | 15 ------- src/Interpreters/InterpreterDropQuery.cpp | 19 +++++++-- 9 files changed, 70 insertions(+), 48 deletions(-) diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index 88d5a0253d1..250c5e3b6c8 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -184,6 +184,11 @@ void LocalServer::initialize(Poco::Util::Application & self) cleanup_threads, 0, // We don't need any threads one all the parts will be deleted cleanup_threads); + + getDatabaseCatalogDropTablesThreadPool().initialize( + server_settings.database_catalog_drop_table_concurrency, + 0, // We don't need any threads if there are no DROP queries. + server_settings.database_catalog_drop_table_concurrency); } diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 16888015f8b..dd56114de0f 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -1035,6 +1035,11 @@ try 0, // We don't need any threads once all the tables will be created max_database_replicated_create_table_thread_pool_size); + getDatabaseCatalogDropTablesThreadPool().initialize( + server_settings.database_catalog_drop_table_concurrency, + 0, // We don't need any threads if there are no DROP queries. + server_settings.database_catalog_drop_table_concurrency); + /// Initialize global local cache for remote filesystem. if (config().has("local_cache_for_remote_fs")) { diff --git a/src/Core/ServerSettings.cpp b/src/Core/ServerSettings.cpp index fbf86d3e9ad..6c498014996 100644 --- a/src/Core/ServerSettings.cpp +++ b/src/Core/ServerSettings.cpp @@ -1,4 +1,4 @@ -#include "ServerSettings.h" +#include #include namespace DB diff --git a/src/Core/ServerSettings.h b/src/Core/ServerSettings.h index 28b32a6e6a5..f2f78f70e91 100644 --- a/src/Core/ServerSettings.h +++ b/src/Core/ServerSettings.h @@ -66,6 +66,15 @@ namespace DB M(Bool, async_insert_queue_flush_on_shutdown, true, "If true queue of asynchronous inserts is flushed on graceful shutdown", 0) \ M(Bool, ignore_empty_sql_security_in_create_view_query, true, "If true, ClickHouse doesn't write defaults for empty SQL security statement in CREATE VIEW queries. This setting is only necessary for the migration period and will become obsolete in 24.4", 0) \ \ + /* Database Catalog */ \ + M(UInt64, database_atomic_delay_before_drop_table_sec, 8 * 60, "The delay during which a dropped table can be restored using the UNDROP statement. If DROP TABLE ran with a SYNC modifier, the setting is ignored.", 0) \ + M(UInt64, database_catalog_unused_dir_hide_timeout_sec, 60 * 60, "Parameter of a task that cleans up garbage from store/ directory. If some subdirectory is not used by clickhouse-server and this directory was not modified for last database_catalog_unused_dir_hide_timeout_sec seconds, the task will 'hide' this directory by removing all access rights. It also works for directories that clickhouse-server does not expect to see inside store/. Zero means 'immediately'.", 0) \ + M(UInt64, database_catalog_unused_dir_rm_timeout_sec, 30 * 24 * 60 * 60, "Parameter of a task that cleans up garbage from store/ directory. If some subdirectory is not used by clickhouse-server and it was previously 'hidden' (see database_catalog_unused_dir_hide_timeout_sec) and this directory was not modified for last database_catalog_unused_dir_rm_timeout_sec seconds, the task will remove this directory. It also works for directories that clickhouse-server does not expect to see inside store/. Zero means 'never'.", 0) \ + M(UInt64, database_catalog_unused_dir_cleanup_period_sec, 24 * 60 * 60, "Parameter of a task that cleans up garbage from store/ directory. Sets scheduling period of the task. Zero means 'never'.", 0) \ + M(UInt64, database_catalog_drop_error_cooldown_sec, 5, "In case if drop table failed, ClickHouse will wait for this timeout before retrying the operation.", 0) \ + M(UInt64, database_catalog_drop_table_concurrency, 16, "The size of the threadpool used for dropping tables.", 0) \ + \ + \ M(UInt64, max_concurrent_queries, 0, "Maximum number of concurrently executed queries. Zero means unlimited.", 0) \ M(UInt64, max_concurrent_insert_queries, 0, "Maximum number of concurrently INSERT queries. Zero means unlimited.", 0) \ M(UInt64, max_concurrent_select_queries, 0, "Maximum number of concurrently SELECT queries. Zero means unlimited.", 0) \ diff --git a/src/IO/SharedThreadPools.cpp b/src/IO/SharedThreadPools.cpp index 3606ddd984c..cda7bc01bbf 100644 --- a/src/IO/SharedThreadPools.cpp +++ b/src/IO/SharedThreadPools.cpp @@ -23,6 +23,9 @@ namespace CurrentMetrics extern const Metric MergeTreeUnexpectedPartsLoaderThreads; extern const Metric MergeTreeUnexpectedPartsLoaderThreadsActive; extern const Metric MergeTreeUnexpectedPartsLoaderThreadsScheduled; + extern const Metric DatabaseCatalogThreads; + extern const Metric DatabaseCatalogThreadsActive; + extern const Metric DatabaseCatalogThreadsScheduled; extern const Metric DatabaseReplicatedCreateTablesThreads; extern const Metric DatabaseReplicatedCreateTablesThreadsActive; extern const Metric DatabaseReplicatedCreateTablesThreadsScheduled; @@ -166,4 +169,11 @@ StaticThreadPool & getDatabaseReplicatedCreateTablesThreadPool() return instance; } +/// ThreadPool used for dropping tables. +StaticThreadPool & getDatabaseCatalogDropTablesThreadPool() +{ + static StaticThreadPool instance("DropTablesThreadPool", CurrentMetrics::DatabaseCatalogThreads, CurrentMetrics::DatabaseCatalogThreadsActive, CurrentMetrics::DatabaseCatalogThreadsScheduled); + return instance; +} + } diff --git a/src/IO/SharedThreadPools.h b/src/IO/SharedThreadPools.h index 50adc70c9a0..06ccebd20b2 100644 --- a/src/IO/SharedThreadPools.h +++ b/src/IO/SharedThreadPools.h @@ -69,4 +69,7 @@ StaticThreadPool & getUnexpectedPartsLoadingThreadPool(); /// ThreadPool used for creating tables in DatabaseReplicated. StaticThreadPool & getDatabaseReplicatedCreateTablesThreadPool(); +/// ThreadPool used for dropping tables. +StaticThreadPool & getDatabaseCatalogDropTablesThreadPool(); + } diff --git a/src/Interpreters/DatabaseCatalog.cpp b/src/Interpreters/DatabaseCatalog.cpp index bb2dd158710..f64f8a06f38 100644 --- a/src/Interpreters/DatabaseCatalog.cpp +++ b/src/Interpreters/DatabaseCatalog.cpp @@ -19,6 +19,8 @@ #include #include #include +#include +#include #include #include #include @@ -189,13 +191,6 @@ StoragePtr TemporaryTableHolder::getTable() const void DatabaseCatalog::initializeAndLoadTemporaryDatabase() { - drop_delay_sec = getContext()->getConfigRef().getInt("database_atomic_delay_before_drop_table_sec", default_drop_delay_sec); - unused_dir_hide_timeout_sec = getContext()->getConfigRef().getInt64("database_catalog_unused_dir_hide_timeout_sec", unused_dir_hide_timeout_sec); - unused_dir_rm_timeout_sec = getContext()->getConfigRef().getInt64("database_catalog_unused_dir_rm_timeout_sec", unused_dir_rm_timeout_sec); - unused_dir_cleanup_period_sec = getContext()->getConfigRef().getInt64("database_catalog_unused_dir_cleanup_period_sec", unused_dir_cleanup_period_sec); - drop_error_cooldown_sec = getContext()->getConfigRef().getInt64("database_catalog_drop_error_cooldown_sec", drop_error_cooldown_sec); - drop_table_concurrency = getContext()->getConfigRef().getInt64("database_catalog_drop_table_concurrency", drop_table_concurrency); - auto db_for_temporary_and_external_tables = std::make_shared(TEMPORARY_DATABASE, getContext()); attachDatabase(TEMPORARY_DATABASE, db_for_temporary_and_external_tables); } @@ -203,7 +198,7 @@ void DatabaseCatalog::initializeAndLoadTemporaryDatabase() void DatabaseCatalog::createBackgroundTasks() { /// It has to be done before databases are loaded (to avoid a race condition on initialization) - if (Context::getGlobalContextInstance()->getApplicationType() == Context::ApplicationType::SERVER && unused_dir_cleanup_period_sec) + if (Context::getGlobalContextInstance()->getApplicationType() == Context::ApplicationType::SERVER && getContext()->getServerSettings().database_catalog_unused_dir_cleanup_period_sec) { auto cleanup_task_holder = getContext()->getSchedulePool().createTask("DatabaseCatalogCleanupStoreDirectoryTask", [this]() { this->cleanupStoreDirectoryTask(); }); @@ -224,7 +219,7 @@ void DatabaseCatalog::startupBackgroundTasks() { (*cleanup_task)->activate(); /// Do not start task immediately on server startup, it's not urgent. - (*cleanup_task)->scheduleAfter(unused_dir_hide_timeout_sec * 1000); + (*cleanup_task)->scheduleAfter(static_cast(getContext()->getServerSettings().database_catalog_unused_dir_hide_timeout_sec) * 1000); } (*drop_task)->activate(); @@ -1038,15 +1033,12 @@ void DatabaseCatalog::loadMarkedAsDroppedTables() LOG_INFO(log, "Found {} partially dropped tables. Will load them and retry removal.", dropped_metadata.size()); - ThreadPool pool(CurrentMetrics::DatabaseCatalogThreads, CurrentMetrics::DatabaseCatalogThreadsActive, CurrentMetrics::DatabaseCatalogThreadsScheduled); + ThreadPoolCallbackRunnerLocal runner(getDatabaseCatalogDropTablesThreadPool().get(), "DropTables"); for (const auto & elem : dropped_metadata) { - pool.scheduleOrThrowOnError([&]() - { - this->enqueueDroppedTableCleanup(elem.second, nullptr, elem.first); - }); + runner([this, &elem](){ this->enqueueDroppedTableCleanup(elem.second, nullptr, elem.first); }); } - pool.wait(); + runner.waitForAllToFinishAndRethrowFirstError(); } String DatabaseCatalog::getPathForDroppedMetadata(const StorageID & table_id) const @@ -1135,7 +1127,13 @@ void DatabaseCatalog::enqueueDroppedTableCleanup(StorageID table_id, StoragePtr } else { - tables_marked_dropped.push_back({table_id, table, dropped_metadata_path, drop_time + drop_delay_sec}); + tables_marked_dropped.push_back + ({ + table_id, + table, + dropped_metadata_path, + drop_time + static_cast(getContext()->getServerSettings().database_atomic_delay_before_drop_table_sec) + }); if (first_async_drop_in_queue == tables_marked_dropped.end()) --first_async_drop_in_queue; } @@ -1289,13 +1287,7 @@ void DatabaseCatalog::dropTablesParallel(std::vector runner(getDatabaseCatalogDropTablesThreadPool().get(), "DropTables"); for (const auto & item : tables_to_drop) { @@ -1332,7 +1324,7 @@ void DatabaseCatalog::dropTablesParallel(std::vectordrop_time = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now()) + drop_error_cooldown_sec; + table_iterator->drop_time = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now()) + getContext()->getServerSettings().database_catalog_drop_error_cooldown_sec; if (first_async_drop_in_queue == tables_marked_dropped.end()) --first_async_drop_in_queue; @@ -1342,7 +1334,7 @@ void DatabaseCatalog::dropTablesParallel(std::vectorscheduleAfter(unused_dir_cleanup_period_sec * 1000); + (*cleanup_task)->scheduleAfter(static_cast(getContext()->getServerSettings().database_catalog_unused_dir_cleanup_period_sec) * 1000); } bool DatabaseCatalog::maybeRemoveDirectory(const String & disk_name, const DiskPtr & disk, const String & unused_dir) @@ -1742,7 +1734,7 @@ bool DatabaseCatalog::maybeRemoveDirectory(const String & disk_name, const DiskP time_t current_time = time(nullptr); if (st.st_mode & (S_IRWXU | S_IRWXG | S_IRWXO)) { - if (current_time <= max_modification_time + unused_dir_hide_timeout_sec) + if (current_time <= max_modification_time + static_cast(getContext()->getServerSettings().database_catalog_unused_dir_hide_timeout_sec)) return false; LOG_INFO(log, "Removing access rights for unused directory {} from disk {} (will remove it when timeout exceed)", unused_dir, disk_name); @@ -1758,6 +1750,8 @@ bool DatabaseCatalog::maybeRemoveDirectory(const String & disk_name, const DiskP } else { + auto unused_dir_rm_timeout_sec = static_cast(getContext()->getServerSettings().database_catalog_unused_dir_rm_timeout_sec); + if (!unused_dir_rm_timeout_sec) return false; diff --git a/src/Interpreters/DatabaseCatalog.h b/src/Interpreters/DatabaseCatalog.h index 23e38a6445e..83a302f117d 100644 --- a/src/Interpreters/DatabaseCatalog.h +++ b/src/Interpreters/DatabaseCatalog.h @@ -354,23 +354,8 @@ private: mutable std::mutex tables_marked_dropped_mutex; std::unique_ptr drop_task; - static constexpr time_t default_drop_delay_sec = 8 * 60; - time_t drop_delay_sec = default_drop_delay_sec; std::condition_variable wait_table_finally_dropped; - std::unique_ptr cleanup_task; - static constexpr time_t default_unused_dir_hide_timeout_sec = 60 * 60; /// 1 hour - time_t unused_dir_hide_timeout_sec = default_unused_dir_hide_timeout_sec; - static constexpr time_t default_unused_dir_rm_timeout_sec = 30 * 24 * 60 * 60; /// 30 days - time_t unused_dir_rm_timeout_sec = default_unused_dir_rm_timeout_sec; - static constexpr time_t default_unused_dir_cleanup_period_sec = 24 * 60 * 60; /// 1 day - time_t unused_dir_cleanup_period_sec = default_unused_dir_cleanup_period_sec; - - static constexpr time_t default_drop_error_cooldown_sec = 5; - time_t drop_error_cooldown_sec = default_drop_error_cooldown_sec; - - static constexpr size_t default_drop_table_concurrency = 10; - size_t drop_table_concurrency = default_drop_table_concurrency; std::unique_ptr reload_disks_task; std::mutex reload_disks_mutex; diff --git a/src/Interpreters/InterpreterDropQuery.cpp b/src/Interpreters/InterpreterDropQuery.cpp index bad3e5277db..d8056ddd1a3 100644 --- a/src/Interpreters/InterpreterDropQuery.cpp +++ b/src/Interpreters/InterpreterDropQuery.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -424,18 +425,28 @@ BlockIO InterpreterDropQuery::executeToDatabaseImpl(const ASTDropQuery & query, auto table_context = Context::createCopy(getContext()); table_context->setInternalQuery(true); /// Do not hold extra shared pointers to tables - std::vector> tables_to_drop; + std::vector> tables_to_drop; // NOTE: This means we wait for all tables to be loaded inside getTablesIterator() call in case of `async_load_databases = true`. for (auto iterator = database->getTablesIterator(table_context); iterator->isValid(); iterator->next()) { auto table_ptr = iterator->table(); - table_ptr->flushAndPrepareForShutdown(); - tables_to_drop.push_back({iterator->name(), table_ptr->isDictionary()}); + tables_to_drop.push_back({table_ptr->getStorageID(), table_ptr->isDictionary()}); } + /// Prepare tables for shutdown in parallel. + ThreadPoolCallbackRunnerLocal runner(getDatabaseCatalogDropTablesThreadPool().get(), "DropTables"); + for (const auto & [name, _] : tables_to_drop) + { + auto table_ptr = DatabaseCatalog::instance().getTable(name, table_context); + runner([my_table_ptr = std::move(table_ptr)](){ + my_table_ptr->flushAndPrepareForShutdown(); + }); + } + runner.waitForAllToFinishAndRethrowFirstError(); + for (const auto & table : tables_to_drop) { - query_for_table.setTable(table.first); + query_for_table.setTable(table.first.getTableName()); query_for_table.is_dictionary = table.second; DatabasePtr db; UUID table_to_wait = UUIDHelpers::Nil; From 1427b16689601d7dd29d26de99b233c132905fde Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Fri, 26 Jul 2024 21:44:15 +0200 Subject: [PATCH 430/661] Fixed style --- src/Interpreters/DatabaseCatalog.cpp | 3 --- src/Interpreters/InterpreterDropQuery.cpp | 3 ++- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/src/Interpreters/DatabaseCatalog.cpp b/src/Interpreters/DatabaseCatalog.cpp index f64f8a06f38..30b151eb81d 100644 --- a/src/Interpreters/DatabaseCatalog.cpp +++ b/src/Interpreters/DatabaseCatalog.cpp @@ -50,9 +50,6 @@ namespace CurrentMetrics { extern const Metric TablesToDropQueueSize; - extern const Metric DatabaseCatalogThreads; - extern const Metric DatabaseCatalogThreadsActive; - extern const Metric DatabaseCatalogThreadsScheduled; } namespace DB diff --git a/src/Interpreters/InterpreterDropQuery.cpp b/src/Interpreters/InterpreterDropQuery.cpp index d8056ddd1a3..ef560ec3405 100644 --- a/src/Interpreters/InterpreterDropQuery.cpp +++ b/src/Interpreters/InterpreterDropQuery.cpp @@ -438,7 +438,8 @@ BlockIO InterpreterDropQuery::executeToDatabaseImpl(const ASTDropQuery & query, for (const auto & [name, _] : tables_to_drop) { auto table_ptr = DatabaseCatalog::instance().getTable(name, table_context); - runner([my_table_ptr = std::move(table_ptr)](){ + runner([my_table_ptr = std::move(table_ptr)]() + { my_table_ptr->flushAndPrepareForShutdown(); }); } From 1096a4ff33497c64eb786f6dbb603b18ccf804b1 Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Mon, 29 Jul 2024 13:37:36 +0000 Subject: [PATCH 431/661] Fixed occasional LOGICAL_ERROR --- src/Interpreters/DatabaseCatalog.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Interpreters/DatabaseCatalog.cpp b/src/Interpreters/DatabaseCatalog.cpp index 30b151eb81d..eaf8cf1cc82 100644 --- a/src/Interpreters/DatabaseCatalog.cpp +++ b/src/Interpreters/DatabaseCatalog.cpp @@ -1336,7 +1336,7 @@ void DatabaseCatalog::dropTablesParallel(std::vector Date: Tue, 30 Jul 2024 00:39:16 +0200 Subject: [PATCH 432/661] Dont throw --- src/Interpreters/DatabaseCatalog.cpp | 30 ++++++++++------------------ 1 file changed, 11 insertions(+), 19 deletions(-) diff --git a/src/Interpreters/DatabaseCatalog.cpp b/src/Interpreters/DatabaseCatalog.cpp index eaf8cf1cc82..98526e5c1cd 100644 --- a/src/Interpreters/DatabaseCatalog.cpp +++ b/src/Interpreters/DatabaseCatalog.cpp @@ -1329,26 +1329,10 @@ void DatabaseCatalog::dropTablesParallel(std::vector Date: Mon, 29 Jul 2024 23:10:13 +0000 Subject: [PATCH 433/661] Fix Dwarf range list parsing in stack symbolizer --- src/Common/Dwarf.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/Common/Dwarf.cpp b/src/Common/Dwarf.cpp index 8439c01b22c..1f22e3b05aa 100644 --- a/src/Common/Dwarf.cpp +++ b/src/Common/Dwarf.cpp @@ -1559,8 +1559,7 @@ bool Dwarf::isAddrInRangeList(const CompilationUnit & cu, auto sp_start = addr_.substr(*cu.addr_base + index_start * sizeof(uint64_t)); auto start = read(sp_start); - auto sp_end = addr_.substr(*cu.addr_base + index_start * sizeof(uint64_t) + length); - auto end = read(sp_end); + auto end = start + length; if (start != end && address >= start && address < end) { return true; From 8f920d064ccca8ed8d9341b10d54ebef7500484c Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 30 Jul 2024 04:05:43 +0200 Subject: [PATCH 434/661] Fix inconsistent formatting of CODEC and STATISTICS --- src/Parsers/ASTFunction.cpp | 4 +++- src/Parsers/ASTFunction.h | 2 ++ src/Parsers/ExpressionElementParsers.cpp | 2 ++ src/Parsers/FunctionSecretArgumentsFinderAST.h | 4 +++- src/Parsers/IAST.h | 1 + src/Storages/StatisticsDescription.cpp | 1 + 6 files changed, 12 insertions(+), 2 deletions(-) diff --git a/src/Parsers/ASTFunction.cpp b/src/Parsers/ASTFunction.cpp index 230d4c778e8..cd9e910d45a 100644 --- a/src/Parsers/ASTFunction.cpp +++ b/src/Parsers/ASTFunction.cpp @@ -285,6 +285,8 @@ static bool formatNamedArgWithHiddenValue(IAST * arg, const IAST::FormatSettings void ASTFunction::formatImplWithoutAlias(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const { frame.expression_list_prepend_whitespace = false; + if (kind == Kind::CODEC || kind == Kind::STATISTICS || kind == Kind::BACKUP_NAME) + frame.allow_operators = false; FormatStateStacked nested_need_parens = frame; FormatStateStacked nested_dont_need_parens = frame; nested_need_parens.need_parens = true; @@ -308,7 +310,7 @@ void ASTFunction::formatImplWithoutAlias(const FormatSettings & settings, Format /// Should this function to be written as operator? bool written = false; - if (arguments && !parameters && nulls_action == NullsAction::EMPTY) + if (arguments && !parameters && frame.allow_operators && nulls_action == NullsAction::EMPTY) { /// Unary prefix operators. if (arguments->children.size() == 1) diff --git a/src/Parsers/ASTFunction.h b/src/Parsers/ASTFunction.h index be2b6beae54..1b4a5928d1c 100644 --- a/src/Parsers/ASTFunction.h +++ b/src/Parsers/ASTFunction.h @@ -58,6 +58,8 @@ public: TABLE_ENGINE, DATABASE_ENGINE, BACKUP_NAME, + CODEC, + STATISTICS, }; Kind kind = Kind::ORDINARY_FUNCTION; diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp index 865d07faaa7..9927acdcf17 100644 --- a/src/Parsers/ExpressionElementParsers.cpp +++ b/src/Parsers/ExpressionElementParsers.cpp @@ -696,6 +696,7 @@ bool ParserCodec::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) auto function_node = std::make_shared(); function_node->name = "CODEC"; + function_node->kind = ASTFunction::Kind::CODEC; function_node->arguments = expr_list_args; function_node->children.push_back(function_node->arguments); @@ -723,6 +724,7 @@ bool ParserStatisticsType::parseImpl(Pos & pos, ASTPtr & node, Expected & expect auto function_node = std::make_shared(); function_node->name = "STATISTICS"; + function_node->kind = ASTFunction::Kind::STATISTICS; function_node->arguments = stat_type; function_node->children.push_back(function_node->arguments); node = function_node; diff --git a/src/Parsers/FunctionSecretArgumentsFinderAST.h b/src/Parsers/FunctionSecretArgumentsFinderAST.h index 5b77485afb0..94da30922cc 100644 --- a/src/Parsers/FunctionSecretArgumentsFinderAST.h +++ b/src/Parsers/FunctionSecretArgumentsFinderAST.h @@ -33,7 +33,9 @@ public: { case ASTFunction::Kind::ORDINARY_FUNCTION: findOrdinaryFunctionSecretArguments(); break; case ASTFunction::Kind::WINDOW_FUNCTION: break; - case ASTFunction::Kind::LAMBDA_FUNCTION: break; + case ASTFunction::Kind::LAMBDA_FUNCTION: break; + case ASTFunction::Kind::CODEC: break; + case ASTFunction::Kind::STATISTICS: break; case ASTFunction::Kind::TABLE_ENGINE: findTableEngineSecretArguments(); break; case ASTFunction::Kind::DATABASE_ENGINE: findDatabaseEngineSecretArguments(); break; case ASTFunction::Kind::BACKUP_NAME: findBackupNameSecretArguments(); break; diff --git a/src/Parsers/IAST.h b/src/Parsers/IAST.h index d70c1cd0b6c..e2cf7579667 100644 --- a/src/Parsers/IAST.h +++ b/src/Parsers/IAST.h @@ -256,6 +256,7 @@ public: bool expression_list_always_start_on_new_line = false; /// Line feed and indent before expression list even if it's of single element. bool expression_list_prepend_whitespace = false; /// Prepend whitespace (if it is required) bool surround_each_list_element_with_parens = false; + bool allow_operators = true; /// Format some functions, such as "plus", "in", etc. as operators. size_t list_element_index = 0; const IAST * current_select = nullptr; }; diff --git a/src/Storages/StatisticsDescription.cpp b/src/Storages/StatisticsDescription.cpp index 9c5fd3604b2..63c849e3806 100644 --- a/src/Storages/StatisticsDescription.cpp +++ b/src/Storages/StatisticsDescription.cpp @@ -193,6 +193,7 @@ ASTPtr ColumnStatisticsDescription::getAST() const { auto function_node = std::make_shared(); function_node->name = "STATISTICS"; + function_node->kind = ASTFunction::Kind::STATISTICS; function_node->arguments = std::make_shared(); for (const auto & [type, desc] : types_to_desc) { From c1e7b7be89f5c5e39318ab093b00fa6cd8114ff3 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 30 Jul 2024 04:07:46 +0200 Subject: [PATCH 435/661] Add a test --- ...nconsistent_formatting_of_codecs_statistics.reference | 2 ++ ...03214_inconsistent_formatting_of_codecs_statistics.sh | 9 +++++++++ 2 files changed, 11 insertions(+) create mode 100644 tests/queries/0_stateless/03214_inconsistent_formatting_of_codecs_statistics.reference create mode 100755 tests/queries/0_stateless/03214_inconsistent_formatting_of_codecs_statistics.sh diff --git a/tests/queries/0_stateless/03214_inconsistent_formatting_of_codecs_statistics.reference b/tests/queries/0_stateless/03214_inconsistent_formatting_of_codecs_statistics.reference new file mode 100644 index 00000000000..7213baa3e5b --- /dev/null +++ b/tests/queries/0_stateless/03214_inconsistent_formatting_of_codecs_statistics.reference @@ -0,0 +1,2 @@ +ALTER TABLE t MODIFY COLUMN `c` CODEC(in(1, 2)) +ALTER TABLE t MODIFY COLUMN `c` STATISTICS(plus(1, 2)) diff --git a/tests/queries/0_stateless/03214_inconsistent_formatting_of_codecs_statistics.sh b/tests/queries/0_stateless/03214_inconsistent_formatting_of_codecs_statistics.sh new file mode 100755 index 00000000000..c3f8d89b9a4 --- /dev/null +++ b/tests/queries/0_stateless/03214_inconsistent_formatting_of_codecs_statistics.sh @@ -0,0 +1,9 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +# Ensure that these (possibly incorrect) queries can at least be parsed back after formatting. +$CLICKHOUSE_FORMAT --oneline --query "ALTER TABLE t MODIFY COLUMN c CODEC(in(1, 2))" | $CLICKHOUSE_FORMAT --oneline +$CLICKHOUSE_FORMAT --oneline --query "ALTER TABLE t MODIFY COLUMN c STATISTICS(plus(1, 2))" | $CLICKHOUSE_FORMAT --oneline From cb6b6329c8e763f61f70797a95dab8ef24fd47d1 Mon Sep 17 00:00:00 2001 From: kevinyhzou Date: Tue, 30 Jul 2024 10:45:36 +0800 Subject: [PATCH 436/661] add session timezone settings --- tests/queries/0_stateless/03198_orc_read_time_zone.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/03198_orc_read_time_zone.sh b/tests/queries/0_stateless/03198_orc_read_time_zone.sh index 7e931e16e48..7d1da0c1579 100755 --- a/tests/queries/0_stateless/03198_orc_read_time_zone.sh +++ b/tests/queries/0_stateless/03198_orc_read_time_zone.sh @@ -8,5 +8,5 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) $CLICKHOUSE_CLIENT -q "drop table if exists test_orc_read_timezone" $CLICKHOUSE_CLIENT -q "create table test_orc_read_timezone(id UInt64, t DateTime64) Engine=MergeTree order by id" $CLICKHOUSE_CLIENT -q "insert into test_orc_read_timezone from infile '$CURDIR/data_orc/test_reader_time_zone.snappy.orc' SETTINGS input_format_orc_reader_time_zone_name='Asia/Shanghai' FORMAT ORC" -$CLICKHOUSE_CLIENT -q "select * from test_orc_read_timezone" +$CLICKHOUSE_CLIENT -q "select * from test_orc_read_timezone SETTINGS session_timezone='Asia/Shanghai'" $CLICKHOUSE_CLIENT -q "drop table test_orc_read_timezone" \ No newline at end of file From dd5819ab6ab2df3231737a4808d0445ad5345555 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 30 Jul 2024 05:16:09 +0200 Subject: [PATCH 437/661] Changelog sanity --- CHANGELOG.md | 55 +++++++++++++++++++++++++--------------------------- 1 file changed, 26 insertions(+), 29 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 620b7c99bac..06f7bcdd84e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,6 +18,7 @@ * Metric `KeeperOutstandingRequets` was renamed to `KeeperOutstandingRequests`. [#66206](https://github.com/ClickHouse/ClickHouse/pull/66206) ([Robert Schulze](https://github.com/rschu1ze)). * Remove `is_deterministic` field from the `system.functions` table. [#66630](https://github.com/ClickHouse/ClickHouse/pull/66630) ([Alexey Milovidov](https://github.com/alexey-milovidov)). * Function `tuple` will now try to construct named tuples in query (controlled by `enable_named_columns_in_function_tuple`). Introduce function `tupleNames` to extract names from tuples. [#54881](https://github.com/ClickHouse/ClickHouse/pull/54881) ([Amos Bird](https://github.com/amosbird)). +* Change how deduplication for Materialized Views works. Fixed a lot of cases like: - on destination table: data is split for 2 or more blocks and that blocks is considered as duplicate when that block is inserted in parallel. - on MV destination table: the equal blocks are deduplicated, that happens when MV often produces equal data as a result for different input data due to performing aggregation. - on MV destination table: the equal blocks which comes from different MV are deduplicated. [#61601](https://github.com/ClickHouse/ClickHouse/pull/61601) ([Sema Checherinda](https://github.com/CheSema)). #### New Feature * Add `ASOF JOIN` support for `full_sorting_join` algorithm. [#55051](https://github.com/ClickHouse/ClickHouse/pull/55051) ([vdimir](https://github.com/vdimir)). @@ -32,13 +33,14 @@ * Add a new setting to disable/enable writing page index into parquet files. [#65475](https://github.com/ClickHouse/ClickHouse/pull/65475) ([lgbo](https://github.com/lgbo-ustc)). * Introduce `logger.console_log_level` server config to control the log level to the console (if enabled). [#65559](https://github.com/ClickHouse/ClickHouse/pull/65559) ([Azat Khuzhin](https://github.com/azat)). * Automatically append a wildcard `*` to the end of a directory path with table function `file`. [#66019](https://github.com/ClickHouse/ClickHouse/pull/66019) ([Zhidong (David) Guo](https://github.com/Gun9niR)). -* Add `--memory-usage` option to client in non interactive mode. [#66393](https://github.com/ClickHouse/ClickHouse/pull/66393) ([vdimir](https://github.com/vdimir)). +* Add `--memory-usage` option to client in non-interactive mode. [#66393](https://github.com/ClickHouse/ClickHouse/pull/66393) ([vdimir](https://github.com/vdimir)). * Make an interactive client for clickhouse-disks, add local disk from the local directory. [#64446](https://github.com/ClickHouse/ClickHouse/pull/64446) ([Daniil Ivanik](https://github.com/divanik)). * When lightweight delete happens on a table with projection(s), users have choices either throw an exception (by default) or drop the projection [#65594](https://github.com/ClickHouse/ClickHouse/pull/65594) ([jsc0218](https://github.com/jsc0218)). +* Add system tables with main information about all detached tables. [#65400](https://github.com/ClickHouse/ClickHouse/pull/65400) ([Konstantin Morozov](https://github.com/k-morozov)). #### Experimental Feature * Change binary serialization of Variant data type: add `compact` mode to avoid writing the same discriminator multiple times for granules with single variant or with only NULL values. Add MergeTree setting `use_compact_variant_discriminators_serialization` that is enabled by default. Note that Variant type is still experimental and backward-incompatible change in serialization is ok. [#62774](https://github.com/ClickHouse/ClickHouse/pull/62774) ([Kruglov Pavel](https://github.com/Avogar)). -* Support rocksdb as backend storage of keeper. [#56626](https://github.com/ClickHouse/ClickHouse/pull/56626) ([Han Fei](https://github.com/hanfei1991)). +* Support rocksdb as backend storage of clickhouse-keeper. [#56626](https://github.com/ClickHouse/ClickHouse/pull/56626) ([Han Fei](https://github.com/hanfei1991)). * Refactor JSONExtract functions, support more types including experimental Dynamic type. [#66046](https://github.com/ClickHouse/ClickHouse/pull/66046) ([Kruglov Pavel](https://github.com/Avogar)). * Support null map subcolumn for Variant and Dynamic subcolumns. [#66178](https://github.com/ClickHouse/ClickHouse/pull/66178) ([Kruglov Pavel](https://github.com/Avogar)). * Fix reading dynamic subcolumns from altered Memory table. Previously if `max_types` parameter of a Dynamic type was changed in Memory table via alter, further subcolumns reading can return wrong result. [#66066](https://github.com/ClickHouse/ClickHouse/pull/66066) ([Kruglov Pavel](https://github.com/Avogar)). @@ -46,8 +48,8 @@ #### Performance Improvement * Replace int to string algorithm with a faster one (from a modified amdn/itoa to a modified jeaiii/itoa). [#61661](https://github.com/ClickHouse/ClickHouse/pull/61661) ([Raúl Marín](https://github.com/Algunenano)). -* Sizes of hash tables created by join (`parallel_hash` algorithm) is collected and cached now. This information will be used to preallocate space in hash tables for subsequent query executions and save time on hash table resizes. [#64553](https://github.com/ClickHouse/ClickHouse/pull/64553) ([Nikita Taranov](https://github.com/nickitat)). -* Optimized queries with `ORDER BY` primary key and `WHERE` that have a condition with high selectivity by using of buffering. It is controlled by setting `read_in_order_use_buffering` (enabled by default) and can increase memory usage of query. [#64607](https://github.com/ClickHouse/ClickHouse/pull/64607) ([Anton Popov](https://github.com/CurtizJ)). +* Sizes of hash tables created by join (`parallel_hash` algorithm) are collected and cached now. This information will be used to preallocate space in hash tables for subsequent query executions and save time on hash table resizes. [#64553](https://github.com/ClickHouse/ClickHouse/pull/64553) ([Nikita Taranov](https://github.com/nickitat)). +* Optimized queries with `ORDER BY` primary key and `WHERE` that have a condition with high selectivity by using buffering. It is controlled by setting `read_in_order_use_buffering` (enabled by default) and can increase memory usage of query. [#64607](https://github.com/ClickHouse/ClickHouse/pull/64607) ([Anton Popov](https://github.com/CurtizJ)). * Improve performance of loading `plain_rewritable` metadata. [#65634](https://github.com/ClickHouse/ClickHouse/pull/65634) ([Alexey Milovidov](https://github.com/alexey-milovidov)). * Attaching tables on read-only disks will use fewer resources by not loading outdated parts. [#65635](https://github.com/ClickHouse/ClickHouse/pull/65635) ([Alexey Milovidov](https://github.com/alexey-milovidov)). * Support minmax hyperrectangle for Set indices. [#65676](https://github.com/ClickHouse/ClickHouse/pull/65676) ([AntiTopQuark](https://github.com/AntiTopQuark)). @@ -59,11 +61,11 @@ * DatabaseCatalog drops tables faster by using up to database_catalog_drop_table_concurrency threads. [#66065](https://github.com/ClickHouse/ClickHouse/pull/66065) ([Sema Checherinda](https://github.com/CheSema)). #### Improvement +* Improved ZooKeeper load balancing. The current session doesn't expire until the optimal nodes become available despite `fallback_session_lifetime`. Added support for AZ-aware balancing. [#65570](https://github.com/ClickHouse/ClickHouse/pull/65570) ([Alexander Tokmakov](https://github.com/tavplubix)). * The setting `optimize_trivial_insert_select` is disabled by default. In most cases, it should be beneficial. Nevertheless, if you are seeing slower INSERT SELECT or increased memory usage, you can enable it back or `SET compatibility = '24.6'`. [#58970](https://github.com/ClickHouse/ClickHouse/pull/58970) ([Alexey Milovidov](https://github.com/alexey-milovidov)). * Print stacktrace and diagnostic info if `clickhouse-client` or `clickhouse-local` crashes. [#61109](https://github.com/ClickHouse/ClickHouse/pull/61109) ([Alexander Tokmakov](https://github.com/tavplubix)). * The result of `SHOW INDEX | INDEXES | INDICES | KEYS` was previously sorted by the primary key column names. Since this was unintuitive, the result is now sorted by the position of the primary key columns within the primary key. [#61131](https://github.com/ClickHouse/ClickHouse/pull/61131) ([Robert Schulze](https://github.com/rschu1ze)). -* Change how deduplication for Materialized Views works. Fixed a lot of cases like: - on destination table: data is split for 2 or more blocks and that blocks is considered as duplicate when that block is inserted in parallel. - on MV destination table: the equal blocks are deduplicated, that happens when MV often produces equal data as a result for different input data due to performing aggregation. - on MV destination table: the equal blocks which comes from different MV are deduplicated. [#61601](https://github.com/ClickHouse/ClickHouse/pull/61601) ([Sema Checherinda](https://github.com/CheSema)). -* Allow matching column names in a case insensitive manner when reading json files (`input_format_json_case_insensitive_column_matching`). [#61750](https://github.com/ClickHouse/ClickHouse/pull/61750) ([kevinyhzou](https://github.com/KevinyhZou)). +* Allow matching column names in a case-insensitive manner when reading json files (`input_format_json_case_insensitive_column_matching`). [#61750](https://github.com/ClickHouse/ClickHouse/pull/61750) ([kevinyhzou](https://github.com/KevinyhZou)). * Support reading partitioned data DeltaLake data. Infer DeltaLake schema by reading metadata instead of data. [#63201](https://github.com/ClickHouse/ClickHouse/pull/63201) ([Kseniia Sumarokova](https://github.com/kssenii)). * In composable protocols TLS layer accepted only `certificateFile` and `privateKeyFile` parameters. https://clickhouse.com/docs/en/operations/settings/composable-protocols. [#63985](https://github.com/ClickHouse/ClickHouse/pull/63985) ([Anton Ivashkin](https://github.com/ianton-ru)). * Added profile event `SelectQueriesWithPrimaryKeyUsage` which indicates how many SELECT queries use the primary key to evaluate the WHERE clause. [#64492](https://github.com/ClickHouse/ClickHouse/pull/64492) ([0x01f](https://github.com/0xfei)). @@ -71,7 +73,6 @@ * Support aliases in parametrized view function (only new analyzer). [#65190](https://github.com/ClickHouse/ClickHouse/pull/65190) ([Kseniia Sumarokova](https://github.com/kssenii)). * Updated to mask account key in logs in azureBlobStorage. [#65273](https://github.com/ClickHouse/ClickHouse/pull/65273) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). * Partition pruning for `IN` predicates when filter expression is a part of `PARTITION BY` expression. [#65335](https://github.com/ClickHouse/ClickHouse/pull/65335) ([Eduard Karacharov](https://github.com/korowa)). -* Add system tables with main information about all detached tables. [#65400](https://github.com/ClickHouse/ClickHouse/pull/65400) ([Konstantin Morozov](https://github.com/k-morozov)). * `arrayMin`/`arrayMax` can be applicable to all data types that are comparable. [#65455](https://github.com/ClickHouse/ClickHouse/pull/65455) ([pn](https://github.com/chloro-pn)). * Improved memory accounting for cgroups v2 to exclude the amount occupied by the page cache. [#65470](https://github.com/ClickHouse/ClickHouse/pull/65470) ([Nikita Taranov](https://github.com/nickitat)). * Do not create format settings for each row when serializing chunks to insert to EmbeddedRocksDB table. [#65474](https://github.com/ClickHouse/ClickHouse/pull/65474) ([Duc Canh Le](https://github.com/canhld94)). @@ -80,36 +81,35 @@ * Disable filesystem cache background download by default. It will be enabled back when we fix the issue with possible "Memory limit exceeded" because memory deallocation is done outside of query context (while buffer is allocated inside of query context) if we use background download threads. Plus we need to add a separate setting to define max size to download for background workers (currently it is limited by max_file_segment_size, which might be too big). [#65534](https://github.com/ClickHouse/ClickHouse/pull/65534) ([Kseniia Sumarokova](https://github.com/kssenii)). * Add new option to config `` which allow to specify how often clickhouse will reload config. [#65545](https://github.com/ClickHouse/ClickHouse/pull/65545) ([alesapin](https://github.com/alesapin)). * Implement binary encoding for ClickHouse data types and add its specification in docs. Use it in Dynamic binary serialization, allow to use it in RowBinaryWithNamesAndTypes and Native formats under settings. [#65546](https://github.com/ClickHouse/ClickHouse/pull/65546) ([Kruglov Pavel](https://github.com/Avogar)). -* Improved ZooKeeper load balancing. The current session doesn't expire until the optimal nodes become available despite `fallback_session_lifetime`. Added support for AZ-aware balancing. [#65570](https://github.com/ClickHouse/ClickHouse/pull/65570) ([Alexander Tokmakov](https://github.com/tavplubix)). * Server settings `compiled_expression_cache_size` and `compiled_expression_cache_elements_size` are now shown in `system.server_settings`. [#65584](https://github.com/ClickHouse/ClickHouse/pull/65584) ([Robert Schulze](https://github.com/rschu1ze)). * Add support for user identification based on x509 SubjectAltName extension. [#65626](https://github.com/ClickHouse/ClickHouse/pull/65626) ([Anton Kozlov](https://github.com/tonickkozlov)). * `clickhouse-local` will respect the `max_server_memory_usage` and `max_server_memory_usage_to_ram_ratio` from the configuration file. It will also set the max memory usage to 90% of the system memory by default, like `clickhouse-server` does. [#65697](https://github.com/ClickHouse/ClickHouse/pull/65697) ([Alexey Milovidov](https://github.com/alexey-milovidov)). * Add a script to backup your files to ClickHouse. [#65699](https://github.com/ClickHouse/ClickHouse/pull/65699) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* PostgreSQL source support cancel. [#65722](https://github.com/ClickHouse/ClickHouse/pull/65722) ([Maksim Kita](https://github.com/kitaisreal)). -* Make allow_experimental_analyzer be controlled by the initiator for distributed queries. This ensures compatibility and correctness during operations in mixed version clusters. [#65777](https://github.com/ClickHouse/ClickHouse/pull/65777) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* PostgreSQL source to support query cancellations. [#65722](https://github.com/ClickHouse/ClickHouse/pull/65722) ([Maksim Kita](https://github.com/kitaisreal)). +* Make `allow_experimental_analyzer` be controlled by the initiator for distributed queries. This ensures compatibility and correctness during operations in mixed version clusters. [#65777](https://github.com/ClickHouse/ClickHouse/pull/65777) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). * Respect cgroup CPU limit in Keeper. [#65819](https://github.com/ClickHouse/ClickHouse/pull/65819) ([Antonio Andelic](https://github.com/antonio2368)). -* Allow to use `concat` function with empty arguments ``` sql :) select concat();. [#65887](https://github.com/ClickHouse/ClickHouse/pull/65887) ([李扬](https://github.com/taiyang-li)). -* Allow controlling named collections in clickhouse-local. [#65973](https://github.com/ClickHouse/ClickHouse/pull/65973) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Improve Azure profile events. [#65999](https://github.com/ClickHouse/ClickHouse/pull/65999) ([alesapin](https://github.com/alesapin)). -* Support ORC file read by writer time zone. [#66025](https://github.com/ClickHouse/ClickHouse/pull/66025) ([kevinyhzou](https://github.com/KevinyhZou)). -* Add settings to control connection to the PostgreSQL. * Setting `postgresql_connection_attempt_timeout` specifies the value passed to `connect_timeout` parameter of connection URL. * Setting `postgresql_connection_pool_retries` specifies the number of retries to establish a connection to the PostgreSQL end-point. [#66232](https://github.com/ClickHouse/ClickHouse/pull/66232) ([Dmitry Novik](https://github.com/novikd)). -* Reduce inaccuracy of input_wait_elapsed_us/input_wait_elapsed_us/elapsed_us. [#66239](https://github.com/ClickHouse/ClickHouse/pull/66239) ([Azat Khuzhin](https://github.com/azat)). -* Improve FilesystemCache ProfileEvents. [#66249](https://github.com/ClickHouse/ClickHouse/pull/66249) ([zhukai](https://github.com/nauu)). -* Add settings to ignore ON CLUSTER clause in queries for named collection management with replicated storage. [#66288](https://github.com/ClickHouse/ClickHouse/pull/66288) ([MikhailBurdukov](https://github.com/MikhailBurdukov)). +* Allow to use `concat` function with empty arguments `:) select concat();`. [#65887](https://github.com/ClickHouse/ClickHouse/pull/65887) ([李扬](https://github.com/taiyang-li)). +* Allow controlling named collections in `clickhouse-local`. [#65973](https://github.com/ClickHouse/ClickHouse/pull/65973) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Improve Azure-related profile events. [#65999](https://github.com/ClickHouse/ClickHouse/pull/65999) ([alesapin](https://github.com/alesapin)). +* Support ORC file read by writer's time zone. [#66025](https://github.com/ClickHouse/ClickHouse/pull/66025) ([kevinyhzou](https://github.com/KevinyhZou)). +* Add settings to control connections to PostgreSQL. The setting `postgresql_connection_attempt_timeout` specifies the value passed to `connect_timeout` parameter of connection URL. The setting `postgresql_connection_pool_retries` specifies the number of retries to establish a connection to the PostgreSQL end-point. [#66232](https://github.com/ClickHouse/ClickHouse/pull/66232) ([Dmitry Novik](https://github.com/novikd)). +* Reduce inaccuracy of `input_wait_elapsed_us`/`elapsed_us` in the `system.processors_profile_log`. [#66239](https://github.com/ClickHouse/ClickHouse/pull/66239) ([Azat Khuzhin](https://github.com/azat)). +* Improve ProfileEvents for the filesystem cache. [#66249](https://github.com/ClickHouse/ClickHouse/pull/66249) ([zhukai](https://github.com/nauu)). +* Add settings to ignore the `ON CLUSTER` clause in queries for named collection management with the replicated storage. [#66288](https://github.com/ClickHouse/ClickHouse/pull/66288) ([MikhailBurdukov](https://github.com/MikhailBurdukov)). * Function `generateSnowflakeID` now allows to specify a machine ID as a parameter to prevent collisions in large clusters. [#66374](https://github.com/ClickHouse/ClickHouse/pull/66374) ([ZAWA_ll](https://github.com/Zawa-ll)). -* Disable suspending on Ctrl+Z in interactive mode. This is a common trap and is not expected behavior for almost all users. I imagine only a few extreme power users could appreciate suspending terminal applications to the background, but I don't know any. [#66511](https://github.com/ClickHouse/ClickHouse/pull/66511) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Add option for validating the Primary key type in Dictionaries. Without this option for simple layouts any column type will be implicitly converted to UInt64. ### Documentation entry for user-facing changes. [#66595](https://github.com/ClickHouse/ClickHouse/pull/66595) ([MikhailBurdukov](https://github.com/MikhailBurdukov)). +* Disable suspending on `Ctrl+Z` in interactive mode. This is a common trap and is not expected behavior for almost all users. I imagine only a few extreme power users could appreciate suspending terminal applications to the background, but I don't know any. [#66511](https://github.com/ClickHouse/ClickHouse/pull/66511) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add option for validating the primary key type in Dictionaries. Without this option for simple layouts any column type will be implicitly converted to UInt64. [#66595](https://github.com/ClickHouse/ClickHouse/pull/66595) ([MikhailBurdukov](https://github.com/MikhailBurdukov)). #### Bug Fix (user-visible misbehavior in an official stable release) -* Fix unexpected size of low cardinality column in function calls. [#65298](https://github.com/ClickHouse/ClickHouse/pull/65298) ([Raúl Marín](https://github.com/Algunenano)). * Check cyclic dependencies on CREATE/REPLACE/RENAME/EXCHANGE queries and throw an exception if there is a cyclic dependency. Previously such cyclic dependencies could lead to a deadlock during server startup. Also fix some bugs in dependencies creation. [#65405](https://github.com/ClickHouse/ClickHouse/pull/65405) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix unexpected sizes of `LowCardinality` columns in function calls. [#65298](https://github.com/ClickHouse/ClickHouse/pull/65298) ([Raúl Marín](https://github.com/Algunenano)). * Fix crash in maxIntersections. [#65689](https://github.com/ClickHouse/ClickHouse/pull/65689) ([Raúl Marín](https://github.com/Algunenano)). -* Fix the VALID UNTIL clause in the user definition resetting after a restart. [#66409](https://github.com/ClickHouse/ClickHouse/pull/66409) ([Nikolay Degterinsky](https://github.com/evillique)). -* Fix SHOW MERGES remaining time. [#66735](https://github.com/ClickHouse/ClickHouse/pull/66735) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix the `VALID UNTIL` clause in the user definition resetting after a restart. [#66409](https://github.com/ClickHouse/ClickHouse/pull/66409) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fix the remaining time column in `SHOW MERGES`. [#66735](https://github.com/ClickHouse/ClickHouse/pull/66735) ([Alexey Milovidov](https://github.com/alexey-milovidov)). * `Query was cancelled` might have been printed twice in clickhouse-client. This behaviour is fixed. [#66005](https://github.com/ClickHouse/ClickHouse/pull/66005) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). -* Fixed crash while using MaterializedMySQL with TABLE OVERRIDE that maps MySQL NULL field into ClickHouse not NULL field. [#54649](https://github.com/ClickHouse/ClickHouse/pull/54649) ([Filipp Ozinov](https://github.com/bakwc)). -* Fix logical error when PREWHERE expression read no columns and table has no adaptive index granularity (very old table). [#59173](https://github.com/ClickHouse/ClickHouse/pull/59173) ([Alexander Gololobov](https://github.com/davenger)). -* Fix bug with cancellation buffer when canceling a query. [#64478](https://github.com/ClickHouse/ClickHouse/pull/64478) ([Sema Checherinda](https://github.com/CheSema)). +* Fixed crash while using `MaterializedMySQL` (which is an unsupported, experimental feature) with TABLE OVERRIDE that maps MySQL NULL field into ClickHouse not NULL field. [#54649](https://github.com/ClickHouse/ClickHouse/pull/54649) ([Filipp Ozinov](https://github.com/bakwc)). +* Fix logical error when `PREWHERE` expression read no columns and table has no adaptive index granularity (very old table). [#59173](https://github.com/ClickHouse/ClickHouse/pull/59173) ([Alexander Gololobov](https://github.com/davenger)). +* Fix bug with the cancellation buffer when canceling a query. [#64478](https://github.com/ClickHouse/ClickHouse/pull/64478) ([Sema Checherinda](https://github.com/CheSema)). * Fix filling parts columns from metadata (when columns.txt does not exists). [#64757](https://github.com/ClickHouse/ClickHouse/pull/64757) ([Azat Khuzhin](https://github.com/azat)). * Fix crash for `ALTER TABLE ... ON CLUSTER ... MODIFY SQL SECURITY`. [#64957](https://github.com/ClickHouse/ClickHouse/pull/64957) ([pufit](https://github.com/pufit)). * Fix crash on destroying AccessControl: add explicit shutdown. [#64993](https://github.com/ClickHouse/ClickHouse/pull/64993) ([Vitaly Baranov](https://github.com/vitlibar)). @@ -178,9 +178,6 @@ * Fix `indexHint` function case found by fuzzer. [#66286](https://github.com/ClickHouse/ClickHouse/pull/66286) ([Anton Popov](https://github.com/CurtizJ)). * Fix AST formatting of 'create table b empty as a'. [#64951](https://github.com/ClickHouse/ClickHouse/pull/64951) ([Michael Kolupaev](https://github.com/al13n321)). -#### Build/Testing/Packaging Improvement -* Instantiate template methods ahead in different .cpp files, avoid too large translation units during compiling. [#64818](https://github.com/ClickHouse/ClickHouse/pull/64818) ([lgbo](https://github.com/lgbo-ustc)). - ### ClickHouse release 24.6, 2024-07-01 #### Backward Incompatible Change From 3a7ffb3284003d853974baf12cf442bdc1105143 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 30 Jul 2024 05:19:10 +0200 Subject: [PATCH 438/661] Changelog sanity --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 06f7bcdd84e..9d1a63cb3a9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -156,7 +156,7 @@ * Fixed how the ClickHouse server detects the maximum number of usable CPU cores as specified by cgroups v2 if the server runs in a container such as Docker. In more detail, containers often run their process in the root cgroup which has an empty name. In that case, ClickHouse ignored the CPU limits set by cgroups v2. [#66237](https://github.com/ClickHouse/ClickHouse/pull/66237) ([filimonov](https://github.com/filimonov)). * Fix the `Not-ready set` error when a subquery with `IN` is used in the constraint. [#66261](https://github.com/ClickHouse/ClickHouse/pull/66261) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). * Fix error reporting while copying to S3 or AzureBlobStorage. [#66295](https://github.com/ClickHouse/ClickHouse/pull/66295) ([Vitaly Baranov](https://github.com/vitlibar)). -* Prevent watchdog from keeping descriptors of unlinked(rotated) log files. [#66334](https://github.com/ClickHouse/ClickHouse/pull/66334) ([Aleksei Filatov](https://github.com/aalexfvk)). +* Prevent watchdog from keeping descriptors of unlinked (rotated) log files. [#66334](https://github.com/ClickHouse/ClickHouse/pull/66334) ([Aleksei Filatov](https://github.com/aalexfvk)). * Fix the bug that logicalexpressionoptimizerpass lost logical type of constant. [#66344](https://github.com/ClickHouse/ClickHouse/pull/66344) ([pn](https://github.com/chloro-pn)). * Fix `Column identifier is already registered` error with `group_by_use_nulls=true` and new analyzer. [#66400](https://github.com/ClickHouse/ClickHouse/pull/66400) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). * Fix possible incorrect result for queries joining and filtering table external engine (like PostgreSQL), due to too aggressive filter pushdown. Since now, conditions from where section won't be send to external database in case of outer join with external table. [#66402](https://github.com/ClickHouse/ClickHouse/pull/66402) ([vdimir](https://github.com/vdimir)). From 368b9a058379c2e7902fd9ee7a21b664f0500df9 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 30 Jul 2024 05:29:36 +0200 Subject: [PATCH 439/661] Changelog sanity --- CHANGELOG.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9d1a63cb3a9..722ae4f8268 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -39,11 +39,11 @@ * Add system tables with main information about all detached tables. [#65400](https://github.com/ClickHouse/ClickHouse/pull/65400) ([Konstantin Morozov](https://github.com/k-morozov)). #### Experimental Feature -* Change binary serialization of Variant data type: add `compact` mode to avoid writing the same discriminator multiple times for granules with single variant or with only NULL values. Add MergeTree setting `use_compact_variant_discriminators_serialization` that is enabled by default. Note that Variant type is still experimental and backward-incompatible change in serialization is ok. [#62774](https://github.com/ClickHouse/ClickHouse/pull/62774) ([Kruglov Pavel](https://github.com/Avogar)). -* Support rocksdb as backend storage of clickhouse-keeper. [#56626](https://github.com/ClickHouse/ClickHouse/pull/56626) ([Han Fei](https://github.com/hanfei1991)). +* Change binary serialization of the `Variant` data type: add `compact` mode to avoid writing the same discriminator multiple times for granules with single variant or with only NULL values. Add MergeTree setting `use_compact_variant_discriminators_serialization` that is enabled by default. Note that Variant type is still experimental and backward-incompatible change in serialization is ok. [#62774](https://github.com/ClickHouse/ClickHouse/pull/62774) ([Kruglov Pavel](https://github.com/Avogar)). +* Support on-disk backend storage for clickhouse-keeper. [#56626](https://github.com/ClickHouse/ClickHouse/pull/56626) ([Han Fei](https://github.com/hanfei1991)). * Refactor JSONExtract functions, support more types including experimental Dynamic type. [#66046](https://github.com/ClickHouse/ClickHouse/pull/66046) ([Kruglov Pavel](https://github.com/Avogar)). -* Support null map subcolumn for Variant and Dynamic subcolumns. [#66178](https://github.com/ClickHouse/ClickHouse/pull/66178) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix reading dynamic subcolumns from altered Memory table. Previously if `max_types` parameter of a Dynamic type was changed in Memory table via alter, further subcolumns reading can return wrong result. [#66066](https://github.com/ClickHouse/ClickHouse/pull/66066) ([Kruglov Pavel](https://github.com/Avogar)). +* Support null map subcolumn for `Variant` and `Dynamic` subcolumns. [#66178](https://github.com/ClickHouse/ClickHouse/pull/66178) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix reading `Dynamic` subcolumns from altered `Memory` table. Previously if `max_types` parameter of a Dynamic type was changed in Memory table via alter, further subcolumns reading can return wrong result. [#66066](https://github.com/ClickHouse/ClickHouse/pull/66066) ([Kruglov Pavel](https://github.com/Avogar)). * Add support for `cluster_for_parallel_replicas` when using custom key parallel replicas. It allows you to use parallel replicas with custom key with MergeTree tables. [#65453](https://github.com/ClickHouse/ClickHouse/pull/65453) ([Antonio Andelic](https://github.com/antonio2368)). #### Performance Improvement From f90b88c978d87225a5cf5f66136714ff5535d69c Mon Sep 17 00:00:00 2001 From: Alexey Gerasimchuck Date: Tue, 30 Jul 2024 05:57:07 +0000 Subject: [PATCH 440/661] Reduced comprexity of the test --- ...2832_alter_max_sessions_for_user.reference | 7 +-- .../02832_alter_max_sessions_for_user.sh | 62 ++++++++----------- 2 files changed, 30 insertions(+), 39 deletions(-) diff --git a/tests/queries/0_stateless/02832_alter_max_sessions_for_user.reference b/tests/queries/0_stateless/02832_alter_max_sessions_for_user.reference index f80f8738ff8..c2e103d61cb 100644 --- a/tests/queries/0_stateless/02832_alter_max_sessions_for_user.reference +++ b/tests/queries/0_stateless/02832_alter_max_sessions_for_user.reference @@ -1,8 +1,7 @@ -test_alter_profile case: max_session_count 1 alter_sessions_count 1 -test_alter_profile case: max_session_count 2 alter_sessions_count 1 +test_alter_profile case: max_sessions_for_user 1 +USER_SESSION_LIMIT_EXCEEDED +test_alter_profile case: max_sessions_for_user 2 USER_SESSION_LIMIT_EXCEEDED -test_alter_profile case: max_session_count 1 alter_sessions_count 2 -test_alter_profile case: max_session_count 2 alter_sessions_count 2 READONLY READONLY READONLY diff --git a/tests/queries/0_stateless/02832_alter_max_sessions_for_user.sh b/tests/queries/0_stateless/02832_alter_max_sessions_for_user.sh index 87fbffdb1e6..55f9e3e97a4 100755 --- a/tests/queries/0_stateless/02832_alter_max_sessions_for_user.sh +++ b/tests/queries/0_stateless/02832_alter_max_sessions_for_user.sh @@ -5,7 +5,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -SESSION_ID_PREFIX="02832_alter_max_sessions_session_$$" QUERY_ID_PREFIX="02832_alter_max_sessions_query_$$" PROFILE="02832_alter_max_sessions_profile_$$" USER="02832_alter_max_sessions_user_$$" @@ -17,48 +16,41 @@ ${CLICKHOUSE_CLIENT} -q $"DROP PROFILE IF EXISTS ${PROFILE}" ${CLICKHOUSE_CLIENT} -q $"CREATE SETTINGS PROFILE ${PROFILE}" ${CLICKHOUSE_CLIENT} -q $"CREATE USER '${USER}' SETTINGS PROFILE '${PROFILE}'" -function run_sessions_set() +function wait_for_query_to_start() { - local sessions_count="$1" - local session_check="$2" - for ((i = 1 ; i <= ${sessions_count} ; i++)); do - local session_id="${SESSION_ID_PREFIX}_${i}" - local query_id="${QUERY_ID_PREFIX}_${i}" - # Write only expected error text - # More than alter_sessions_count queries will not start. - ${CLICKHOUSE_CURL} -sS -X POST "${CLICKHOUSE_URL}&user=${USER}&query_id=${query_id}&session_id=${session_id}&session_check=${session_check}&session_timeout=600&function_sleep_max_microseconds_per_block=120000000" --data-binary "SELECT sleep(120)" | grep -o -m 1 'USER_SESSION_LIMIT_EXCEEDED' & + while [[ $($CLICKHOUSE_CLIENT -q "SELECT count() FROM system.processes WHERE query_id = '$1'") == 0 ]]; do sleep 0.2; done +} + +function test_alter_max_sessions_for_user() +{ + local max_sessions_for_user="$1" + echo $"test_alter_profile case: max_sessions_for_user ${max_sessions_for_user}" + + # Step 0: Set max_sessions_for_user. + ${CLICKHOUSE_CLIENT} -q $"ALTER SETTINGS PROFILE ${PROFILE} SETTINGS max_sessions_for_user = ${max_sessions_for_user}" + + # Step 1: Simulaneously run `max_sessions_for_user` queries. These queries should run without any problems. + for ((i = 1 ; i <= max_sessions_for_user ; i++)); do + local query_id="${QUERY_ID_PREFIX}_${i}_${max_sessions_for_user}" + ${CLICKHOUSE_CLIENT} --max_block_size 1 --query_id $query_id --user $USER --function_sleep_max_microseconds_per_block=120000000 -q "SELECT sleepEachRow(0.1) FROM numbers(1200)" &>/dev/null & + wait_for_query_to_start $query_id done - for ((i = 1 ; i <= ${sessions_count} ; i++)); do - local query_id="${QUERY_ID_PREFIX}_${i}" - $CLICKHOUSE_CLIENT --query "KILL QUERY WHERE query_id='$query_id' SYNC" >/dev/null + # Step 2: Run another `max_sessions_for_user` + 1 query. That query should fail. + local query_id="${QUERY_ID_PREFIX}_should_fail" + ${CLICKHOUSE_CLIENT} --query_id $query_id --user $USER -q "SELECT 1" 2>&1 | grep -o -m 1 'USER_SESSION_LIMIT_EXCEEDED' + + # Step 3: Stop running queries launched at step 1. + for ((i = 1 ; i <= max_sessions_for_user ; i++)); do + local query_id="${QUERY_ID_PREFIX}_${i}_${max_sessions_for_user}" + $CLICKHOUSE_CLIENT --query "KILL QUERY WHERE query_id='$query_id' ASYNC" >/dev/null done wait } -function test_alter_profile() -{ - local max_session_count="$1" - local alter_sessions_count="$2" - echo $"test_alter_profile case: max_session_count ${max_session_count} alter_sessions_count ${alter_sessions_count}" - - ${CLICKHOUSE_CLIENT} -q $"ALTER SETTINGS PROFILE ${PROFILE} SETTINGS max_sessions_for_user = ${max_session_count}" - - # Create sessions with $max_session_count restriction - run_sessions_set $max_session_count 0 - - # Update restriction to $alter_sessions_count - ${CLICKHOUSE_CLIENT} -q $"ALTER SETTINGS PROFILE ${PROFILE} SETTINGS max_sessions_for_user = ${alter_sessions_count}" - - # Simultaneous sessions should use max settings from profile ($alter_sessions_count) - run_sessions_set $max_session_count 1 -} - -test_alter_profile 1 1 -test_alter_profile 2 1 -test_alter_profile 1 2 -test_alter_profile 2 2 +test_alter_max_sessions_for_user 1 +test_alter_max_sessions_for_user 2 ${CLICKHOUSE_CLIENT} -q "SELECT 1 SETTINGS max_sessions_for_user = 1" 2>&1 | grep -m 1 -o 'READONLY' | head -1 ${CLICKHOUSE_CLIENT} -q $"SET max_sessions_for_user = 1 " 2>&1 | grep -o -m 1 'READONLY' | head -1 From 7a53a14940ae1be299305548f0d024de7f279fe3 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 30 Jul 2024 09:29:16 +0200 Subject: [PATCH 441/661] Update 03213_deep_json.sql --- tests/queries/0_stateless/03213_deep_json.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/03213_deep_json.sql b/tests/queries/0_stateless/03213_deep_json.sql index 4f79d99eb26..2a9476381ff 100644 --- a/tests/queries/0_stateless/03213_deep_json.sql +++ b/tests/queries/0_stateless/03213_deep_json.sql @@ -1,5 +1,5 @@ -- The default limit works. -SELECT * FROM format("JSONCompactEachRow", 'x UInt32, y UInt32', REPEAT('[1,1,', 100000)) SETTINGS input_format_json_compact_allow_variable_number_of_columns = 1; -- { serverError TOO_DEEP_RECURSION } +SELECT * FROM format("JSONCompactEachRow", 'x UInt32, y UInt32', REPEAT('[1,1,', 100000)) SETTINGS input_format_json_compact_allow_variable_number_of_columns = 1; -- { serverError TOO_DEEP_RECURSION, INCORRECT_DATA } -- Even if we relax the limit, it is also safe. SET input_format_json_max_depth = 100000; -SELECT * FROM format("JSONCompactEachRow", 'x UInt32, y UInt32', REPEAT('[1,1,', 100000)) SETTINGS input_format_json_compact_allow_variable_number_of_columns = 1; -- { serverError TOO_DEEP_RECURSION } +SELECT * FROM format("JSONCompactEachRow", 'x UInt32, y UInt32', REPEAT('[1,1,', 100000)) SETTINGS input_format_json_compact_allow_variable_number_of_columns = 1; -- { serverError TOO_DEEP_RECURSION, INCORRECT_DATA } From 6ab67323862391e520ac571f609f52b582248da5 Mon Sep 17 00:00:00 2001 From: Michael Kolupaev Date: Tue, 30 Jul 2024 07:36:38 +0000 Subject: [PATCH 442/661] Delete .reference --- ...2_refreshable_materialized_views.reference | 44 ------------------- 1 file changed, 44 deletions(-) delete mode 100644 tests/queries/0_stateless/02932_refreshable_materialized_views.reference diff --git a/tests/queries/0_stateless/02932_refreshable_materialized_views.reference b/tests/queries/0_stateless/02932_refreshable_materialized_views.reference deleted file mode 100644 index 2eb41590af1..00000000000 --- a/tests/queries/0_stateless/02932_refreshable_materialized_views.reference +++ /dev/null @@ -1,44 +0,0 @@ -<1: created view> a [] 1 -CREATE MATERIALIZED VIEW default.a\nREFRESH AFTER 2 SECOND\n(\n `x` UInt64\n)\nENGINE = Memory\nAS SELECT number AS x\nFROM numbers(2)\nUNION ALL\nSELECT rand64() AS x -<2: refreshed> 3 1 1 -<3: time difference at least> 1000 -<4: next refresh in> 2 -<4.5: altered> Scheduled Finished 2052-01-01 00:00:00 -CREATE MATERIALIZED VIEW default.a\nREFRESH EVERY 2 YEAR\n(\n `x` UInt64\n)\nENGINE = Memory\nAS SELECT x * 2 AS x\nFROM default.src -<5: no refresh> 3 -<6: refreshed> 2 -<7: refreshed> Scheduled Finished 2054-01-01 00:00:00 -CREATE MATERIALIZED VIEW default.b\nREFRESH EVERY 2 YEAR DEPENDS ON default.a\n(\n `y` Int32\n)\nENGINE = MergeTree\nORDER BY y\nSETTINGS index_granularity = 8192\nAS SELECT x * 10 AS y\nFROM default.a -<8: refreshed> 20 -<9: refreshed> a Scheduled Finished 2054-01-01 00:00:00 -<9: refreshed> b Scheduled Finished 2054-01-01 00:00:00 -<10: waiting> a Scheduled [] 2054-01-01 00:00:00 -<10: waiting> b WaitingForDependencies ['default.a'] 2054-01-01 00:00:00 -<11: chain-refreshed a> 4 -<12: chain-refreshed b> 40 -<13: chain-refreshed> a Scheduled [] Finished 2054-01-01 00:00:01 2056-01-01 00:00:00 -<13: chain-refreshed> b Scheduled ['default.a'] Finished 2054-01-24 23:22:21 2056-01-01 00:00:00 -<14: waiting for next cycle> a Scheduled [] 2058-01-01 00:00:00 -<14: waiting for next cycle> b WaitingForDependencies ['default.a'] 2060-01-01 00:00:00 -<15: chain-refreshed a> 6 -<16: chain-refreshed b> 60 -<17: chain-refreshed> a Scheduled 2062-01-01 00:00:00 -<17: chain-refreshed> b Scheduled 2062-01-01 00:00:00 -<18: removed dependency> b Scheduled [] 2062-03-03 03:03:03 2064-01-01 00:00:00 5 -CREATE MATERIALIZED VIEW default.b\nREFRESH EVERY 2 YEAR\n(\n `y` Int32\n)\nENGINE = MergeTree\nORDER BY y\nSETTINGS index_granularity = 8192\nAS SELECT x * 10 AS y\nFROM default.a -<19: exception> 1 -<20: unexception> 1 -<21: rename> 1 -<22: rename> d Finished -<23: simple refresh> 1 -<24: rename during refresh> 1 -<25: rename during refresh> f Running -<27: cancelled> f Scheduled -CREATE MATERIALIZED VIEW default.g\nREFRESH EVERY 1 WEEK OFFSET 3 DAY 4 HOUR RANDOMIZE FOR 4 DAY 1 HOUR\n(\n `x` Int64\n)\nENGINE = Memory\nAS SELECT 42 -<29: randomize> 1 1 -CREATE MATERIALIZED VIEW default.h\nREFRESH EVERY 1 SECOND TO default.dest\n(\n `x` Int64\n)\nAS SELECT x * 10 AS x\nFROM default.src -<30: to existing table> 10 -<31: to existing table> 10 -<31: to existing table> 20 -<32: empty> i Scheduled Unknown -<32: empty> j Scheduled Finished From 861bdb51f8e098c4b06cb14988e00febbb5a0ac7 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Tue, 30 Jul 2024 09:42:38 +0200 Subject: [PATCH 443/661] Fix test --- .../test_replicated_table_attach/test.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/tests/integration/test_replicated_table_attach/test.py b/tests/integration/test_replicated_table_attach/test.py index 02ef1ad6353..499220def2c 100644 --- a/tests/integration/test_replicated_table_attach/test.py +++ b/tests/integration/test_replicated_table_attach/test.py @@ -27,7 +27,20 @@ def started_cluster(): cluster.shutdown() +def start_clean_clickhouse(): + # remove fault injection if present + if "fault_injection.xml" in node.exec_in_container( + ["bash", "-c", "ls /etc/clickhouse-server/config.d"] + ): + print("Removing fault injection") + node.exec_in_container( + ["bash", "-c", "rm /etc/clickhouse-server/config.d/fault_injection.xml"] + ) + node.restart_clickhouse() + + def test_startup_with_small_bg_pool(started_cluster): + start_clean_clickhouse() node.query( "CREATE TABLE replicated_table (k UInt64, i32 Int32) ENGINE=ReplicatedMergeTree('/clickhouse/replicated_table', 'r1') ORDER BY k" ) @@ -45,6 +58,7 @@ def test_startup_with_small_bg_pool(started_cluster): def test_startup_with_small_bg_pool_partitioned(started_cluster): + start_clean_clickhouse() node.query( "CREATE TABLE replicated_table_partitioned (k UInt64, i32 Int32) ENGINE=ReplicatedMergeTree('/clickhouse/replicated_table_partitioned', 'r1') ORDER BY k" ) From a70571762f7d73a7ecc94981e8086418ecfdeb3b Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 30 Jul 2024 10:08:35 +0200 Subject: [PATCH 444/661] Enable text_log by default --- programs/server/config.xml | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/programs/server/config.xml b/programs/server/config.xml index 94825a55f67..844aff8f668 100644 --- a/programs/server/config.xml +++ b/programs/server/config.xml @@ -1130,8 +1130,7 @@ 7500 - + system part_log
@@ -1143,9 +1142,9 @@ false
- system text_log
@@ -1154,9 +1153,8 @@ 8192 524288 false - + trace
- --> From c427c4e2bba852f6f8f9b9346a9a2d0a09f0e4be Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Tue, 30 Jul 2024 10:34:47 +0200 Subject: [PATCH 445/661] Typo --- src/Interpreters/DatabaseCatalog.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Interpreters/DatabaseCatalog.cpp b/src/Interpreters/DatabaseCatalog.cpp index 98526e5c1cd..a8e5fd7e6aa 100644 --- a/src/Interpreters/DatabaseCatalog.cpp +++ b/src/Interpreters/DatabaseCatalog.cpp @@ -1355,7 +1355,7 @@ void DatabaseCatalog::dropTableDataTask() } catch (...) { - /// We don't re-throw expection, because we are in a background pool. + /// We don't re-throw exception, because we are in a background pool. tryLogCurrentException(log, "Cannot drop tables. Will retry later."); } } From 3e6a1b99e023eb3d592c72c17ae4913a9074b5af Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Tue, 30 Jul 2024 11:05:45 +0200 Subject: [PATCH 446/661] Fix file/URI parsing with archive syntax --- src/IO/Archives/ArchiveUtils.cpp | 50 +++++++++++++++++++ src/IO/Archives/ArchiveUtils.h | 14 ++++++ src/IO/Archives/createArchiveReader.cpp | 13 ++--- src/IO/Archives/createArchiveWriter.cpp | 9 ++-- src/IO/S3/URI.cpp | 39 +++++++-------- src/IO/S3/URI.h | 3 +- src/Storages/StorageFile.cpp | 8 ++- src/TableFunctions/TableFunctionFile.cpp | 9 ++-- .../03214_parsing_archive_name_file.reference | 12 +++++ .../03214_parsing_archive_name_file.sh | 21 ++++++++ .../03215_parsing_archive_name_s3.reference | 2 + .../03215_parsing_archive_name_s3.sql | 6 +++ .../data_minio/::03215_archive.csv | 1 + .../data_minio/test :: 03215_archive.csv | 1 + .../data_minio/test::03215_archive.csv | 1 + 15 files changed, 145 insertions(+), 44 deletions(-) create mode 100644 src/IO/Archives/ArchiveUtils.cpp create mode 100644 tests/queries/0_stateless/03214_parsing_archive_name_file.reference create mode 100755 tests/queries/0_stateless/03214_parsing_archive_name_file.sh create mode 100644 tests/queries/0_stateless/03215_parsing_archive_name_s3.reference create mode 100644 tests/queries/0_stateless/03215_parsing_archive_name_s3.sql create mode 100644 tests/queries/0_stateless/data_minio/::03215_archive.csv create mode 100644 tests/queries/0_stateless/data_minio/test :: 03215_archive.csv create mode 100644 tests/queries/0_stateless/data_minio/test::03215_archive.csv diff --git a/src/IO/Archives/ArchiveUtils.cpp b/src/IO/Archives/ArchiveUtils.cpp new file mode 100644 index 00000000000..50009087de3 --- /dev/null +++ b/src/IO/Archives/ArchiveUtils.cpp @@ -0,0 +1,50 @@ +#include + +#include +#include + +namespace DB +{ + +namespace +{ + +using namespace std::literals; +constexpr std::array tar_extensions{".tar"sv, ".tar.gz"sv, ".tgz"sv, ".tar.zst"sv, ".tzst"sv, ".tar.xz"sv, ".tar.bz2"sv, ".tar.lzma"sv}; +constexpr std::array zip_extensions{".zip"sv, ".zipx"sv}; +constexpr std::array sevenz_extensiosns{".7z"sv}; + +bool hasSupportedExtension(std::string_view path, const auto & supported_extensions) +{ + for (auto supported_extension : supported_extensions) + { + if (path.ends_with(supported_extension)) + return true; + } + + return false; +} + +} + +bool hasSupportedTarExtension(std::string_view path) +{ + return hasSupportedExtension(path, tar_extensions); +} + +bool hasSupportedZipExtension(std::string_view path) +{ + return hasSupportedExtension(path, zip_extensions); +} + +bool hasSupported7zExtension(std::string_view path) +{ + return hasSupportedExtension(path, sevenz_extensiosns); +} + +bool hasSupportedArchiveExtension(std::string_view path) +{ + return hasSupportedTarExtension(path) || hasSupportedZipExtension(path) || hasSupported7zExtension(path); +} + +} diff --git a/src/IO/Archives/ArchiveUtils.h b/src/IO/Archives/ArchiveUtils.h index 1b66be005a2..cdb731d1d57 100644 --- a/src/IO/Archives/ArchiveUtils.h +++ b/src/IO/Archives/ArchiveUtils.h @@ -10,3 +10,17 @@ #include #include #endif + +#include + +namespace DB +{ + +bool hasSupportedTarExtension(std::string_view path); +bool hasSupportedZipExtension(std::string_view path); +bool hasSupported7zExtension(std::string_view path); + +bool hasSupportedArchiveExtension(std::string_view path); + + +} diff --git a/src/IO/Archives/createArchiveReader.cpp b/src/IO/Archives/createArchiveReader.cpp index 782602091ac..dfa098eede0 100644 --- a/src/IO/Archives/createArchiveReader.cpp +++ b/src/IO/Archives/createArchiveReader.cpp @@ -1,6 +1,7 @@ #include #include #include +#include #include @@ -12,7 +13,6 @@ extern const int CANNOT_UNPACK_ARCHIVE; extern const int SUPPORT_IS_DISABLED; } - std::shared_ptr createArchiveReader(const String & path_to_archive) { return createArchiveReader(path_to_archive, {}, 0); @@ -24,11 +24,7 @@ std::shared_ptr createArchiveReader( [[maybe_unused]] const std::function()> & archive_read_function, [[maybe_unused]] size_t archive_size) { - using namespace std::literals; - static constexpr std::array tar_extensions{ - ".tar"sv, ".tar.gz"sv, ".tgz"sv, ".tar.zst"sv, ".tzst"sv, ".tar.xz"sv, ".tar.bz2"sv, ".tar.lzma"sv}; - - if (path_to_archive.ends_with(".zip") || path_to_archive.ends_with(".zipx")) + if (hasSupportedZipExtension(path_to_archive)) { #if USE_MINIZIP return std::make_shared(path_to_archive, archive_read_function, archive_size); @@ -36,8 +32,7 @@ std::shared_ptr createArchiveReader( throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "minizip library is disabled"); #endif } - else if (std::any_of( - tar_extensions.begin(), tar_extensions.end(), [&](const auto extension) { return path_to_archive.ends_with(extension); })) + else if (hasSupportedTarExtension(path_to_archive)) { #if USE_LIBARCHIVE return std::make_shared(path_to_archive, archive_read_function); @@ -45,7 +40,7 @@ std::shared_ptr createArchiveReader( throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "libarchive library is disabled"); #endif } - else if (path_to_archive.ends_with(".7z")) + else if (hasSupported7zExtension(path_to_archive)) { #if USE_LIBARCHIVE return std::make_shared(path_to_archive); diff --git a/src/IO/Archives/createArchiveWriter.cpp b/src/IO/Archives/createArchiveWriter.cpp index 9a169587088..53be0a85a10 100644 --- a/src/IO/Archives/createArchiveWriter.cpp +++ b/src/IO/Archives/createArchiveWriter.cpp @@ -1,3 +1,4 @@ +#include #include #include #include @@ -24,10 +25,7 @@ std::shared_ptr createArchiveWriter(const String & path_to_archi std::shared_ptr createArchiveWriter(const String & path_to_archive, [[maybe_unused]] std::unique_ptr archive_write_buffer) { - using namespace std::literals; - static constexpr std::array tar_extensions{ - ".tar"sv, ".tar.gz"sv, ".tgz"sv, ".tar.bz2"sv, ".tar.lzma"sv, ".tar.zst"sv, ".tzst"sv, ".tar.xz"sv}; - if (path_to_archive.ends_with(".zip") || path_to_archive.ends_with(".zipx")) + if (hasSupportedZipExtension(path_to_archive)) { #if USE_MINIZIP return std::make_shared(path_to_archive, std::move(archive_write_buffer)); @@ -35,8 +33,7 @@ createArchiveWriter(const String & path_to_archive, [[maybe_unused]] std::unique throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "minizip library is disabled"); #endif } - else if (std::any_of( - tar_extensions.begin(), tar_extensions.end(), [&](const auto extension) { return path_to_archive.ends_with(extension); })) + else if (hasSupportedTarExtension(path_to_archive)) { #if USE_LIBARCHIVE return std::make_shared(path_to_archive, std::move(archive_write_buffer)); diff --git a/src/IO/S3/URI.cpp b/src/IO/S3/URI.cpp index 4bf7a3ddf86..b9c400d2b98 100644 --- a/src/IO/S3/URI.cpp +++ b/src/IO/S3/URI.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include @@ -54,10 +55,7 @@ URI::URI(const std::string & uri_) static constexpr auto OSS = "OSS"; static constexpr auto EOS = "EOS"; - if (containsArchive(uri_)) - std::tie(uri_str, archive_pattern) = getPathToArchiveAndArchivePattern(uri_); - else - uri_str = uri_; + std::tie(uri_str, archive_pattern) = getURIAndArchivePattern(uri_); uri = Poco::URI(uri_str); std::unordered_map mapper; @@ -167,32 +165,29 @@ void URI::validateBucket(const String & bucket, const Poco::URI & uri) !uri.empty() ? " (" + uri.toString() + ")" : ""); } -bool URI::containsArchive(const std::string & source) +std::pair> URI::getURIAndArchivePattern(const std::string & source) { size_t pos = source.find("::"); - return (pos != std::string::npos); -} + if (pos == String::npos) + return {source, std::nullopt}; -std::pair URI::getPathToArchiveAndArchivePattern(const std::string & source) -{ - size_t pos = source.find("::"); - assert(pos != std::string::npos); + std::string_view path_to_archive_view = std::string_view{source}.substr(0, pos); + while (path_to_archive_view.ends_with(' ')) + path_to_archive_view.remove_suffix(1); - std::string path_to_archive = source.substr(0, pos); - while ((!path_to_archive.empty()) && path_to_archive.ends_with(' ')) - path_to_archive.pop_back(); + if (path_to_archive_view.empty() || !hasSupportedArchiveExtension(path_to_archive_view)) + return {source, std::nullopt}; - if (path_to_archive.empty()) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Path to archive is empty"); + auto archive_uri = path_to_archive_view; - std::string_view path_in_archive_view = std::string_view{source}.substr(pos + 2); - while (path_in_archive_view.front() == ' ') - path_in_archive_view.remove_prefix(1); + std::string_view archive_pattern_view = std::string_view{source}.substr(pos + 2); + while (archive_pattern_view.front() == ' ') + archive_pattern_view.remove_prefix(1); - if (path_in_archive_view.empty()) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Filename is empty"); + if (archive_pattern_view.empty()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Archive pattern is empty"); - return {path_to_archive, std::string{path_in_archive_view}}; + return std::pair{std::string{archive_uri}, std::string{archive_pattern_view}}; } } diff --git a/src/IO/S3/URI.h b/src/IO/S3/URI.h index 363f98c46f5..e4bb0d9eae1 100644 --- a/src/IO/S3/URI.h +++ b/src/IO/S3/URI.h @@ -42,8 +42,7 @@ struct URI static void validateBucket(const std::string & bucket, const Poco::URI & uri); private: - bool containsArchive(const std::string & source); - std::pair getPathToArchiveAndArchivePattern(const std::string & source); + std::pair> getURIAndArchivePattern(const std::string & source); }; } diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index 98cd5c4dfa9..de56fcf66a0 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -2247,8 +2248,11 @@ void StorageFile::parseFileSource(String source, String & filename, String & pat while (path_to_archive_view.ends_with(' ')) path_to_archive_view.remove_suffix(1); - if (path_to_archive_view.empty()) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Path to archive is empty"); + if (path_to_archive_view.empty() || !hasSupportedArchiveExtension(path_to_archive_view)) + { + filename = std::move(source); + return; + } path_to_archive = path_to_archive_view; diff --git a/src/TableFunctions/TableFunctionFile.cpp b/src/TableFunctions/TableFunctionFile.cpp index 1b6d86f8fa5..12b88ae2b14 100644 --- a/src/TableFunctions/TableFunctionFile.cpp +++ b/src/TableFunctions/TableFunctionFile.cpp @@ -63,9 +63,12 @@ std::optional TableFunctionFile::tryGetFormatFromFirstArgument() return FormatFactory::instance().tryGetFormatFromFileName(filename); } -StoragePtr TableFunctionFile::getStorage(const String & source, - const String & format_, const ColumnsDescription & columns, - ContextPtr global_context, const std::string & table_name, +StoragePtr TableFunctionFile::getStorage( + const String & source, + const String & format_, + const ColumnsDescription & columns, + ContextPtr global_context, + const std::string & table_name, const std::string & compression_method_) const { // For `file` table function, we are going to use format settings from the diff --git a/tests/queries/0_stateless/03214_parsing_archive_name_file.reference b/tests/queries/0_stateless/03214_parsing_archive_name_file.reference new file mode 100644 index 00000000000..243a7c8fd02 --- /dev/null +++ b/tests/queries/0_stateless/03214_parsing_archive_name_file.reference @@ -0,0 +1,12 @@ +::nonexistentfile.csv +1 +nonexistent::nonexistentfile.csv +1 +nonexistent :: nonexistentfile.csv +1 +nonexistent ::nonexistentfile.csv +1 +nonexistent.tar.gz :: nonexistentfile.csv +1 +nonexistent.zip:: nonexistentfile.csv +1 diff --git a/tests/queries/0_stateless/03214_parsing_archive_name_file.sh b/tests/queries/0_stateless/03214_parsing_archive_name_file.sh new file mode 100755 index 00000000000..32bf3246c84 --- /dev/null +++ b/tests/queries/0_stateless/03214_parsing_archive_name_file.sh @@ -0,0 +1,21 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +function try_to_read_file() +{ + file_to_read=$1 + file_argument=$2 + + echo $file_argument + $CLICKHOUSE_LOCAL -q "SELECT * FROM file('$file_argument')" 2>&1 | rg -c "Cannot stat file.*$file_to_read" +} + +try_to_read_file "::nonexistentfile.csv" "::nonexistentfile.csv" +try_to_read_file "nonexistent::nonexistentfile.csv" "nonexistent::nonexistentfile.csv" +try_to_read_file "nonexistent :: nonexistentfile.csv" "nonexistent :: nonexistentfile.csv" +try_to_read_file "nonexistent ::nonexistentfile.csv" "nonexistent ::nonexistentfile.csv" +try_to_read_file "nonexistent.tar.gz" "nonexistent.tar.gz :: nonexistentfile.csv" +try_to_read_file "nonexistent.zip" "nonexistent.zip:: nonexistentfile.csv" diff --git a/tests/queries/0_stateless/03215_parsing_archive_name_s3.reference b/tests/queries/0_stateless/03215_parsing_archive_name_s3.reference new file mode 100644 index 00000000000..9dd925a7480 --- /dev/null +++ b/tests/queries/0_stateless/03215_parsing_archive_name_s3.reference @@ -0,0 +1,2 @@ +::03215_archive.csv test/::03215_archive.csv +test::03215_archive.csv test/test::03215_archive.csv diff --git a/tests/queries/0_stateless/03215_parsing_archive_name_s3.sql b/tests/queries/0_stateless/03215_parsing_archive_name_s3.sql new file mode 100644 index 00000000000..9d01f53c838 --- /dev/null +++ b/tests/queries/0_stateless/03215_parsing_archive_name_s3.sql @@ -0,0 +1,6 @@ +-- Tags: no-fasttest +-- Tag no-fasttest: Depends on AWS + +SELECT _file, _path FROM s3(s3_conn, filename='::03215_archive.csv') ORDER BY (_file, _path); +SELECT _file, _path FROM s3(s3_conn, filename='test :: 03215_archive.csv') ORDER BY (_file, _path); -- { serverError STD_EXCEPTION } +SELECT _file, _path FROM s3(s3_conn, filename='test::03215_archive.csv') ORDER BY (_file, _path); diff --git a/tests/queries/0_stateless/data_minio/::03215_archive.csv b/tests/queries/0_stateless/data_minio/::03215_archive.csv new file mode 100644 index 00000000000..d00491fd7e5 --- /dev/null +++ b/tests/queries/0_stateless/data_minio/::03215_archive.csv @@ -0,0 +1 @@ +1 diff --git a/tests/queries/0_stateless/data_minio/test :: 03215_archive.csv b/tests/queries/0_stateless/data_minio/test :: 03215_archive.csv new file mode 100644 index 00000000000..d00491fd7e5 --- /dev/null +++ b/tests/queries/0_stateless/data_minio/test :: 03215_archive.csv @@ -0,0 +1 @@ +1 diff --git a/tests/queries/0_stateless/data_minio/test::03215_archive.csv b/tests/queries/0_stateless/data_minio/test::03215_archive.csv new file mode 100644 index 00000000000..d00491fd7e5 --- /dev/null +++ b/tests/queries/0_stateless/data_minio/test::03215_archive.csv @@ -0,0 +1 @@ +1 From 5381619b2ab465386f11e86242883419e48e5f6b Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 30 Jul 2024 11:36:25 +0200 Subject: [PATCH 447/661] Remove bad feature. --- CHANGELOG.md | 1 - 1 file changed, 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 722ae4f8268..a4c873ba3f9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -65,7 +65,6 @@ * The setting `optimize_trivial_insert_select` is disabled by default. In most cases, it should be beneficial. Nevertheless, if you are seeing slower INSERT SELECT or increased memory usage, you can enable it back or `SET compatibility = '24.6'`. [#58970](https://github.com/ClickHouse/ClickHouse/pull/58970) ([Alexey Milovidov](https://github.com/alexey-milovidov)). * Print stacktrace and diagnostic info if `clickhouse-client` or `clickhouse-local` crashes. [#61109](https://github.com/ClickHouse/ClickHouse/pull/61109) ([Alexander Tokmakov](https://github.com/tavplubix)). * The result of `SHOW INDEX | INDEXES | INDICES | KEYS` was previously sorted by the primary key column names. Since this was unintuitive, the result is now sorted by the position of the primary key columns within the primary key. [#61131](https://github.com/ClickHouse/ClickHouse/pull/61131) ([Robert Schulze](https://github.com/rschu1ze)). -* Allow matching column names in a case-insensitive manner when reading json files (`input_format_json_case_insensitive_column_matching`). [#61750](https://github.com/ClickHouse/ClickHouse/pull/61750) ([kevinyhzou](https://github.com/KevinyhZou)). * Support reading partitioned data DeltaLake data. Infer DeltaLake schema by reading metadata instead of data. [#63201](https://github.com/ClickHouse/ClickHouse/pull/63201) ([Kseniia Sumarokova](https://github.com/kssenii)). * In composable protocols TLS layer accepted only `certificateFile` and `privateKeyFile` parameters. https://clickhouse.com/docs/en/operations/settings/composable-protocols. [#63985](https://github.com/ClickHouse/ClickHouse/pull/63985) ([Anton Ivashkin](https://github.com/ianton-ru)). * Added profile event `SelectQueriesWithPrimaryKeyUsage` which indicates how many SELECT queries use the primary key to evaluate the WHERE clause. [#64492](https://github.com/ClickHouse/ClickHouse/pull/64492) ([0x01f](https://github.com/0xfei)). From 4d4fc8fd6f0123613305423d861429f54222d23f Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Tue, 30 Jul 2024 11:57:37 +0200 Subject: [PATCH 448/661] Add setting to disable archive path syntax --- src/Core/Settings.h | 1 + src/Core/SettingsChangesHistory.cpp | 1 + src/IO/S3/URI.cpp | 8 ++++++-- src/IO/S3/URI.h | 2 +- src/Storages/ObjectStorage/S3/Configuration.cpp | 8 ++++---- src/Storages/StorageFile.cpp | 14 ++++++++++++-- src/Storages/StorageFile.h | 2 +- src/TableFunctions/TableFunctionFile.cpp | 5 +++-- .../03214_parsing_archive_name_file.reference | 4 ++++ .../0_stateless/03214_parsing_archive_name_file.sh | 10 ++++++++-- .../03215_parsing_archive_name_s3.reference | 1 + .../0_stateless/03215_parsing_archive_name_s3.sql | 1 + .../data_minio/test.zip::03215_archive.csv | 1 + 13 files changed, 44 insertions(+), 14 deletions(-) create mode 100644 tests/queries/0_stateless/data_minio/test.zip::03215_archive.csv diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 4fc2034b855..5114a8204cd 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -936,6 +936,7 @@ class IColumn; M(UInt64, parallel_replicas_min_number_of_rows_per_replica, 0, "Limit the number of replicas used in a query to (estimated rows to read / min_number_of_rows_per_replica). The max is still limited by 'max_parallel_replicas'", 0) \ M(Bool, parallel_replicas_prefer_local_join, true, "If true, and JOIN can be executed with parallel replicas algorithm, and all storages of right JOIN part are *MergeTree, local JOIN will be used instead of GLOBAL JOIN.", 0) \ M(UInt64, parallel_replicas_mark_segment_size, 128, "Parts virtually divided into segments to be distributed between replicas for parallel reading. This setting controls the size of these segments. Not recommended to change until you're absolutely sure in what you're doing", 0) \ + M(Bool, allow_archive_path_syntax, true, "File/S3 engines/table function will parse paths with '::' as ' :: ' if archive has correct extension", 0) \ \ M(Bool, allow_experimental_inverted_index, false, "If it is set to true, allow to use experimental inverted index.", 0) \ M(Bool, allow_experimental_full_text_index, false, "If it is set to true, allow to use experimental full-text index.", 0) \ diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp index 9faf77e9087..8483a267237 100644 --- a/src/Core/SettingsChangesHistory.cpp +++ b/src/Core/SettingsChangesHistory.cpp @@ -124,6 +124,7 @@ static std::initializer_list mapper; diff --git a/src/IO/S3/URI.h b/src/IO/S3/URI.h index e4bb0d9eae1..80e2da96cd4 100644 --- a/src/IO/S3/URI.h +++ b/src/IO/S3/URI.h @@ -36,7 +36,7 @@ struct URI bool is_virtual_hosted_style; URI() = default; - explicit URI(const std::string & uri_); + explicit URI(const std::string & uri_, bool allow_archive_path_syntax = false); void addRegionToURI(const std::string & region); static void validateBucket(const std::string & bucket, const Poco::URI & uri); diff --git a/src/Storages/ObjectStorage/S3/Configuration.cpp b/src/Storages/ObjectStorage/S3/Configuration.cpp index 094ca069e7a..7542f59dcc4 100644 --- a/src/Storages/ObjectStorage/S3/Configuration.cpp +++ b/src/Storages/ObjectStorage/S3/Configuration.cpp @@ -142,14 +142,14 @@ ObjectStoragePtr StorageS3Configuration::createObjectStorage(ContextPtr context, void StorageS3Configuration::fromNamedCollection(const NamedCollection & collection, ContextPtr context) { - const auto settings = context->getSettingsRef(); + const auto & settings = context->getSettingsRef(); validateNamedCollection(collection, required_configuration_keys, optional_configuration_keys); auto filename = collection.getOrDefault("filename", ""); if (!filename.empty()) - url = S3::URI(std::filesystem::path(collection.get("url")) / filename); + url = S3::URI(std::filesystem::path(collection.get("url")) / filename, settings.allow_archive_path_syntax); else - url = S3::URI(collection.get("url")); + url = S3::URI(collection.get("url"), settings.allow_archive_path_syntax); auth_settings.access_key_id = collection.getOrDefault("access_key_id", ""); auth_settings.secret_access_key = collection.getOrDefault("secret_access_key", ""); @@ -330,7 +330,7 @@ void StorageS3Configuration::fromAST(ASTs & args, ContextPtr context, bool with_ } /// This argument is always the first - url = S3::URI(checkAndGetLiteralArgument(args[0], "url")); + url = S3::URI(checkAndGetLiteralArgument(args[0], "url"), context->getSettingsRef().allow_archive_path_syntax); if (engine_args_to_idx.contains("format")) { diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index de56fcf66a0..efb39f90053 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -2208,7 +2208,11 @@ void registerStorageFile(StorageFactory & factory) else if (type == Field::Types::UInt64) source_fd = static_cast(literal->value.get()); else if (type == Field::Types::String) - StorageFile::parseFileSource(literal->value.get(), source_path, storage_args.path_to_archive); + StorageFile::parseFileSource( + literal->value.get(), + source_path, + storage_args.path_to_archive, + factory_args.getLocalContext()->getSettingsRef().allow_archive_path_syntax); else throw Exception(ErrorCodes::BAD_ARGUMENTS, "Second argument must be path or file descriptor"); } @@ -2235,8 +2239,14 @@ SchemaCache & StorageFile::getSchemaCache(const ContextPtr & context) return schema_cache; } -void StorageFile::parseFileSource(String source, String & filename, String & path_to_archive) +void StorageFile::parseFileSource(String source, String & filename, String & path_to_archive, bool allow_archive_path_syntax) { + if (!allow_archive_path_syntax) + { + filename = std::move(source); + return; + } + size_t pos = source.find("::"); if (pos == String::npos) { diff --git a/src/Storages/StorageFile.h b/src/Storages/StorageFile.h index 895a8a663b8..bb969c1877c 100644 --- a/src/Storages/StorageFile.h +++ b/src/Storages/StorageFile.h @@ -128,7 +128,7 @@ public: static SchemaCache & getSchemaCache(const ContextPtr & context); - static void parseFileSource(String source, String & filename, String & path_to_archive); + static void parseFileSource(String source, String & filename, String & path_to_archive, bool allow_archive_path_syntax); static ArchiveInfo getArchiveInfo( const std::string & path_to_archive, diff --git a/src/TableFunctions/TableFunctionFile.cpp b/src/TableFunctions/TableFunctionFile.cpp index 12b88ae2b14..af327cfe54e 100644 --- a/src/TableFunctions/TableFunctionFile.cpp +++ b/src/TableFunctions/TableFunctionFile.cpp @@ -26,7 +26,7 @@ void TableFunctionFile::parseFirstArguments(const ASTPtr & arg, const ContextPtr if (context->getApplicationType() != Context::ApplicationType::LOCAL) { ITableFunctionFileLike::parseFirstArguments(arg, context); - StorageFile::parseFileSource(std::move(filename), filename, path_to_archive); + StorageFile::parseFileSource(std::move(filename), filename, path_to_archive, context->getSettingsRef().allow_archive_path_syntax); return; } @@ -42,7 +42,8 @@ void TableFunctionFile::parseFirstArguments(const ASTPtr & arg, const ContextPtr else if (filename == "stderr") fd = STDERR_FILENO; else - StorageFile::parseFileSource(std::move(filename), filename, path_to_archive); + StorageFile::parseFileSource( + std::move(filename), filename, path_to_archive, context->getSettingsRef().allow_archive_path_syntax); } else if (type == Field::Types::Int64 || type == Field::Types::UInt64) { diff --git a/tests/queries/0_stateless/03214_parsing_archive_name_file.reference b/tests/queries/0_stateless/03214_parsing_archive_name_file.reference index 243a7c8fd02..d793d26dfc3 100644 --- a/tests/queries/0_stateless/03214_parsing_archive_name_file.reference +++ b/tests/queries/0_stateless/03214_parsing_archive_name_file.reference @@ -10,3 +10,7 @@ nonexistent.tar.gz :: nonexistentfile.csv 1 nonexistent.zip:: nonexistentfile.csv 1 +nonexistent.tar.gz :: nonexistentfile.csv SETTINGS allow_archive_path_syntax=0 +1 +nonexistent.zip:: nonexistentfile.csv SETTINGS allow_archive_path_syntax=0 +1 diff --git a/tests/queries/0_stateless/03214_parsing_archive_name_file.sh b/tests/queries/0_stateless/03214_parsing_archive_name_file.sh index 32bf3246c84..2f77627f6be 100755 --- a/tests/queries/0_stateless/03214_parsing_archive_name_file.sh +++ b/tests/queries/0_stateless/03214_parsing_archive_name_file.sh @@ -8,14 +8,20 @@ function try_to_read_file() { file_to_read=$1 file_argument=$2 + settings=$3 - echo $file_argument - $CLICKHOUSE_LOCAL -q "SELECT * FROM file('$file_argument')" 2>&1 | rg -c "Cannot stat file.*$file_to_read" + echo $file_argument $settings + $CLICKHOUSE_LOCAL -q "SELECT * FROM file('$file_argument') $settings" 2>&1 | rg -c "Cannot stat file.*$file_to_read" } +# if archive extension is not detected for part before '::', path is taken as is try_to_read_file "::nonexistentfile.csv" "::nonexistentfile.csv" try_to_read_file "nonexistent::nonexistentfile.csv" "nonexistent::nonexistentfile.csv" try_to_read_file "nonexistent :: nonexistentfile.csv" "nonexistent :: nonexistentfile.csv" try_to_read_file "nonexistent ::nonexistentfile.csv" "nonexistent ::nonexistentfile.csv" +# if archive extension is detected for part before '::', path is split into archive and filename try_to_read_file "nonexistent.tar.gz" "nonexistent.tar.gz :: nonexistentfile.csv" try_to_read_file "nonexistent.zip" "nonexistent.zip:: nonexistentfile.csv" +# disabling archive syntax will always parse path as is +try_to_read_file "nonexistent.tar.gz :: nonexistentfile.csv" "nonexistent.tar.gz :: nonexistentfile.csv" "SETTINGS allow_archive_path_syntax=0" +try_to_read_file "nonexistent.zip:: nonexistentfile.csv" "nonexistent.zip:: nonexistentfile.csv" "SETTINGS allow_archive_path_syntax=0" diff --git a/tests/queries/0_stateless/03215_parsing_archive_name_s3.reference b/tests/queries/0_stateless/03215_parsing_archive_name_s3.reference index 9dd925a7480..b4804c82dc2 100644 --- a/tests/queries/0_stateless/03215_parsing_archive_name_s3.reference +++ b/tests/queries/0_stateless/03215_parsing_archive_name_s3.reference @@ -1,2 +1,3 @@ ::03215_archive.csv test/::03215_archive.csv test::03215_archive.csv test/test::03215_archive.csv +test.zip::03215_archive.csv test/test.zip::03215_archive.csv diff --git a/tests/queries/0_stateless/03215_parsing_archive_name_s3.sql b/tests/queries/0_stateless/03215_parsing_archive_name_s3.sql index 9d01f53c838..3a7ed0b864c 100644 --- a/tests/queries/0_stateless/03215_parsing_archive_name_s3.sql +++ b/tests/queries/0_stateless/03215_parsing_archive_name_s3.sql @@ -4,3 +4,4 @@ SELECT _file, _path FROM s3(s3_conn, filename='::03215_archive.csv') ORDER BY (_file, _path); SELECT _file, _path FROM s3(s3_conn, filename='test :: 03215_archive.csv') ORDER BY (_file, _path); -- { serverError STD_EXCEPTION } SELECT _file, _path FROM s3(s3_conn, filename='test::03215_archive.csv') ORDER BY (_file, _path); +SELECT _file, _path FROM s3(s3_conn, filename='test.zip::03215_archive.csv') ORDER BY (_file, _path) SETTINGS allow_archive_path_syntax=0; diff --git a/tests/queries/0_stateless/data_minio/test.zip::03215_archive.csv b/tests/queries/0_stateless/data_minio/test.zip::03215_archive.csv new file mode 100644 index 00000000000..d00491fd7e5 --- /dev/null +++ b/tests/queries/0_stateless/data_minio/test.zip::03215_archive.csv @@ -0,0 +1 @@ +1 From b1e80883f0324995d84250d5edf37fd8ab475987 Mon Sep 17 00:00:00 2001 From: Nikita Fomichev Date: Tue, 30 Jul 2024 11:39:22 +0200 Subject: [PATCH 449/661] `accept_invalid_certificate` in client config: additional testing #65238 --- .../configs/ssl_config_strict.xml | 17 +++++++++ .../test_accept_invalid_certificate/test.py | 35 +++++++++++++++++++ 2 files changed, 52 insertions(+) create mode 100644 tests/integration/test_accept_invalid_certificate/configs/ssl_config_strict.xml diff --git a/tests/integration/test_accept_invalid_certificate/configs/ssl_config_strict.xml b/tests/integration/test_accept_invalid_certificate/configs/ssl_config_strict.xml new file mode 100644 index 00000000000..a4383a77ac4 --- /dev/null +++ b/tests/integration/test_accept_invalid_certificate/configs/ssl_config_strict.xml @@ -0,0 +1,17 @@ + + + 9440 + + + + + /etc/clickhouse-server/config.d/self-cert.pem + /etc/clickhouse-server/config.d/self-key.pem + /etc/clickhouse-server/config.d/ca-cert.pem + strict + + + diff --git a/tests/integration/test_accept_invalid_certificate/test.py b/tests/integration/test_accept_invalid_certificate/test.py index 87229d75f90..f43e9e6140a 100644 --- a/tests/integration/test_accept_invalid_certificate/test.py +++ b/tests/integration/test_accept_invalid_certificate/test.py @@ -17,6 +17,19 @@ instance = cluster.add_instance( "certs/self-cert.pem", "certs/ca-cert.pem", ], + with_zookeeper=False, +) + + +node1 = cluster.add_instance( + "node1", + main_configs=[ + "configs/ssl_config_strict.xml", + "certs/self-key.pem", + "certs/self-cert.pem", + "certs/ca-cert.pem", + ], + with_zookeeper=False, ) @@ -90,3 +103,25 @@ def test_connection_accept(): ) == "1\n" ) + + +def test_strict_reject(): + with pytest.raises(Exception) as err: + execute_query_native(node1, "SELECT 1", "") + assert "certificate verify failed" in str(err.value) + + +def test_strict_reject_with_config(): + with pytest.raises(Exception) as err: + execute_query_native(node1, "SELECT 1", config_accept) + assert "alert certificate required" in str(err.value) + + +def test_strict_connection_reject(): + with pytest.raises(Exception) as err: + execute_query_native( + node1, + "SELECT 1", + config_connection_accept.format(ip_address=f"{instance.ip_address}"), + ) + assert "certificate verify failed" in str(err.value) From bdf98cbcc0121ab94dd1db39fc5cf977a7ed42ad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Tue, 30 Jul 2024 10:06:01 +0000 Subject: [PATCH 450/661] Fix public backports --- tests/ci/cherry_pick.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ci/cherry_pick.py b/tests/ci/cherry_pick.py index 0b2aa9a2d35..b660ad2c040 100644 --- a/tests/ci/cherry_pick.py +++ b/tests/ci/cherry_pick.py @@ -505,7 +505,7 @@ class Backport: ReleaseBranch( ( br - if self._repo_name == "ClickHouse/Clickhouse" + if self._repo_name == "ClickHouse/ClickHouse" else f"release/{br}" ), pr, From 9a05a3ed9e7cccada42f49a0cd5c3896010f9edb Mon Sep 17 00:00:00 2001 From: Nikita Fomichev Date: Tue, 30 Jul 2024 12:07:34 +0200 Subject: [PATCH 451/661] Add missing documentation for `groupConcat` after series of reverts and merges https://github.com/ClickHouse/ClickHouse/pull/65384 --- .../reference/groupconcat.md | 90 +++++++++++++++++++ .../aspell-ignore/en/aspell-dict.txt | 2 + 2 files changed, 92 insertions(+) create mode 100644 docs/en/sql-reference/aggregate-functions/reference/groupconcat.md diff --git a/docs/en/sql-reference/aggregate-functions/reference/groupconcat.md b/docs/en/sql-reference/aggregate-functions/reference/groupconcat.md new file mode 100644 index 00000000000..072252de8c9 --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/groupconcat.md @@ -0,0 +1,90 @@ +--- +slug: /en/sql-reference/aggregate-functions/reference/groupconcat +sidebar_position: 363 +sidebar_label: groupConcat +title: groupConcat +--- + +Calculates a concatenated string from a group of strings, optionally separated by a delimiter, and optionally limited by a maximum number of elements. + +**Syntax** + +``` sql +groupConcat(expression [, delimiter] [, limit]); +``` + +**Arguments** + +- `expression` — The expression or column name that outputs strings to be concatenated.. +- `delimiter` — A [string](../../../sql-reference/data-types/string.md) that will be used to separate concatenated values. This parameter is optional and defaults to an empty string if not specified. +- `limit` — A positive [integer](../../../sql-reference/data-types/int-uint.md) specifying the maximum number of elements to concatenate. If more elements are present, excess elements are ignored. This parameter is optional. + +:::note +If delimiter is specified without limit, it must be the first parameter following the expression. If both delimiter and limit are specified, delimiter must precede limit. +::: + +**Returned value** + +- Returns a [string](../../../sql-reference/data-types/string.md) consisting of the concatenated values of the column or expression. If the group has no elements or only null elements, and the function does not specify a handling for only null values, the result is a nullable string with a null value. + +**Examples** + +Input table: + +``` text +┌─id─┬─name─┐ +│ 1 │ John│ +│ 2 │ Jane│ +│ 3 │ Bob│ +└────┴──────┘ +``` + +1. Basic usage without a delimiter: + +Query: + +``` sql +SELECT groupConcat(Name) FROM Employees; +``` + +Result: + +``` text +JohnJaneBob +``` + +This concatenates all names into one continuous string without any separator. + + +2. Using comma as a delimiter: + +Query: + +``` sql +SELECT groupConcat(Name, ', ', 2) FROM Employees; +``` + +Result: + +``` text +John, Jane, Bob +``` + +This output shows the names separated by a comma followed by a space. + + +3. Limiting the number of concatenated elements + +Query: + +``` sql +SELECT groupConcat(Name, ', ', 2) FROM Employees; +``` + +Result: + +``` text +John, Jane +``` + +This query limits the output to the first two names, even though there are more names in the table. diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index b21ae0764c6..d82b70cfdb4 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -1733,6 +1733,7 @@ groupBitmap groupBitmapAnd groupBitmapOr groupBitmapXor +groupConcat groupUniqArray grouparray grouparrayinsertat @@ -1749,6 +1750,7 @@ groupbitmapor groupbitmapxor groupbitor groupbitxor +groupconcat groupuniqarray grpc grpcio From fb466287dac16801518547f34b42edbc16a57fae Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Tue, 30 Jul 2024 12:13:47 +0200 Subject: [PATCH 452/661] Update 02150_index_hypothesis_race_long.sh --- tests/queries/0_stateless/02150_index_hypothesis_race_long.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02150_index_hypothesis_race_long.sh b/tests/queries/0_stateless/02150_index_hypothesis_race_long.sh index be7cfa78492..c29b604d23d 100755 --- a/tests/queries/0_stateless/02150_index_hypothesis_race_long.sh +++ b/tests/queries/0_stateless/02150_index_hypothesis_race_long.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-distributed-cache +# Tags: long, no-distributed-cache CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh From d7534c5b40c315cdb3d52101f626dbee7d565cdb Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 30 Jul 2024 12:20:54 +0200 Subject: [PATCH 453/661] Update 03210_optimize_rewrite_aggregate_function_with_if_return_type_bug.sql --- ...imize_rewrite_aggregate_function_with_if_return_type_bug.sql | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/queries/0_stateless/03210_optimize_rewrite_aggregate_function_with_if_return_type_bug.sql b/tests/queries/0_stateless/03210_optimize_rewrite_aggregate_function_with_if_return_type_bug.sql index b620a6434bf..565a481940a 100644 --- a/tests/queries/0_stateless/03210_optimize_rewrite_aggregate_function_with_if_return_type_bug.sql +++ b/tests/queries/0_stateless/03210_optimize_rewrite_aggregate_function_with_if_return_type_bug.sql @@ -1,3 +1,5 @@ +SET allow_experimental_analyzer = 1; + -- For function count, rewrite countState to countStateIf changes the type from AggregateFunction(count, Nullable(UInt64)) to AggregateFunction(count, UInt64) -- We can cast AggregateFunction(count, UInt64) back to AggregateFunction(count, Nullable(UInt64)) with additional _CAST select hex(countState(if(toNullable(number % 2 = 0), number, null))) from numbers(5) settings optimize_rewrite_aggregate_function_with_if=1; From 6ef628a7c80ebd6ec727365ee69d8141a4f11400 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 30 Jul 2024 10:33:22 +0000 Subject: [PATCH 454/661] Fixing build. --- src/Processors/QueryPlan/DistinctStep.cpp | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/Processors/QueryPlan/DistinctStep.cpp b/src/Processors/QueryPlan/DistinctStep.cpp index a481454139d..b1c24fc01ce 100644 --- a/src/Processors/QueryPlan/DistinctStep.cpp +++ b/src/Processors/QueryPlan/DistinctStep.cpp @@ -10,6 +10,11 @@ namespace DB { +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + static ITransformingStep::Traits getTraits(bool pre_distinct) { const bool preserves_number_of_streams = pre_distinct; @@ -90,7 +95,8 @@ void DistinctStep::transformPipeline(QueryPipelineBuilder & pipeline, const Buil /// final distinct for sorted stream (sorting inside and among chunks) if (input_stream.sort_scope == DataStream::SortScope::Global) { - assert(input_stream.has_single_port); + if (pipeline.getNumStreams() != 1) + throw Exception(ErrorCodes::LOGICAL_ERROR, "DistinctStep with in-order expects single input"); if (distinct_sort_desc.size() < columns.size()) { From 0f8feff4d3806fa6f81d24184ab68bcd6e727551 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Tue, 30 Jul 2024 12:34:30 +0200 Subject: [PATCH 455/661] Add KeeperMap retries --- src/Storages/StorageKeeperMap.cpp | 132 +++++++++++++++++------------- 1 file changed, 73 insertions(+), 59 deletions(-) diff --git a/src/Storages/StorageKeeperMap.cpp b/src/Storages/StorageKeeperMap.cpp index ef157239e26..5534bb7f346 100644 --- a/src/Storages/StorageKeeperMap.cpp +++ b/src/Storages/StorageKeeperMap.cpp @@ -35,6 +35,7 @@ #include #include +#include "Common/ZooKeeper/ZooKeeperRetries.h" #include #include #include @@ -120,7 +121,7 @@ public: : SinkToStorage(header), storage(storage_), context(std::move(context_)) { auto primary_key = storage.getPrimaryKey(); - assert(primary_key.size() == 1); + chassert(primary_key.size() == 1); primary_key_pos = getHeader().getPositionByName(primary_key[0]); } @@ -171,76 +172,89 @@ public: template void finalize(bool strict) { - auto zookeeper = storage.getClient(); + const auto & settings = context->getSettingsRef(); - auto keys_limit = storage.keysLimit(); + ZooKeeperRetriesControl zk_retry{ + getName(), + getLogger(getName()), + ZooKeeperRetriesInfo{ + settings.insert_keeper_max_retries, + settings.insert_keeper_retry_initial_backoff_ms, + settings.insert_keeper_retry_max_backoff_ms}, + context->getProcessListElement()}; - size_t current_keys_num = 0; - size_t new_keys_num = 0; - - // We use keys limit as a soft limit so we ignore some cases when it can be still exceeded - // (e.g if parallel insert queries are being run) - if (keys_limit != 0) + retries_ctl.retryLoop([&]() { - Coordination::Stat data_stat; - zookeeper->get(storage.dataPath(), &data_stat); - current_keys_num = data_stat.numChildren; - } + auto zookeeper = storage.getClient(); + auto keys_limit = storage.keysLimit(); - std::vector key_paths; - key_paths.reserve(new_values.size()); - for (const auto & [key, _] : new_values) - key_paths.push_back(storage.fullPathForKey(key)); + size_t current_keys_num = 0; + size_t new_keys_num = 0; - zkutil::ZooKeeper::MultiExistsResponse results; - - if constexpr (!for_update) - { - if (!strict) - results = zookeeper->exists(key_paths); - } - - Coordination::Requests requests; - requests.reserve(key_paths.size()); - for (size_t i = 0; i < key_paths.size(); ++i) - { - auto key = fs::path(key_paths[i]).filename(); - - if constexpr (for_update) + // We use keys limit as a soft limit so we ignore some cases when it can be still exceeded + // (e.g if parallel insert queries are being run) + if (keys_limit != 0) { - int32_t version = -1; - if (strict) - version = versions.at(key); - - requests.push_back(zkutil::makeSetRequest(key_paths[i], new_values[key], version)); + Coordination::Stat data_stat; + zookeeper->get(storage.dataPath(), &data_stat); + current_keys_num = data_stat.numChildren; } - else + + std::vector key_paths; + key_paths.reserve(new_values.size()); + for (const auto & [key, _] : new_values) + key_paths.push_back(storage.fullPathForKey(key)); + + zkutil::ZooKeeper::MultiExistsResponse results; + + if constexpr (!for_update) { - if (!strict && results[i].error == Coordination::Error::ZOK) + if (!strict) + results = zookeeper->exists(key_paths); + } + + Coordination::Requests requests; + requests.reserve(key_paths.size()); + for (size_t i = 0; i < key_paths.size(); ++i) + { + auto key = fs::path(key_paths[i]).filename(); + + if constexpr (for_update) { - requests.push_back(zkutil::makeSetRequest(key_paths[i], new_values[key], -1)); + int32_t version = -1; + if (strict) + version = versions.at(key); + + requests.push_back(zkutil::makeSetRequest(key_paths[i], new_values[key], version)); } else { - requests.push_back(zkutil::makeCreateRequest(key_paths[i], new_values[key], zkutil::CreateMode::Persistent)); - ++new_keys_num; + if (!strict && results[i].error == Coordination::Error::ZOK) + { + requests.push_back(zkutil::makeSetRequest(key_paths[i], new_values[key], -1)); + } + else + { + requests.push_back(zkutil::makeCreateRequest(key_paths[i], new_values[key], zkutil::CreateMode::Persistent)); + ++new_keys_num; + } } } - } - if (new_keys_num != 0) - { - auto will_be = current_keys_num + new_keys_num; - if (keys_limit != 0 && will_be > keys_limit) - throw Exception( - ErrorCodes::LIMIT_EXCEEDED, - "Limit would be exceeded by inserting {} new key(s). Limit is {}, while the number of keys would be {}", - new_keys_num, - keys_limit, - will_be); - } + if (new_keys_num != 0) + { + auto will_be = current_keys_num + new_keys_num; + if (keys_limit != 0 && will_be > keys_limit) + throw Exception( + ErrorCodes::LIMIT_EXCEEDED, + "Limit would be exceeded by inserting {} new key(s). Limit is {}, while the number of keys would be {}", + new_keys_num, + keys_limit, + will_be); + } - zookeeper->multi(requests, /* check_session_valid */ true); + zookeeper->multi(requests, /* check_session_valid */ true); + }); } }; @@ -529,8 +543,8 @@ Pipe StorageKeeperMap::read( size_t num_keys = keys->size(); size_t num_threads = std::min(num_streams, keys->size()); - assert(num_keys <= std::numeric_limits::max()); - assert(num_threads <= std::numeric_limits::max()); + chassert(num_keys <= std::numeric_limits::max()); + chassert(num_threads <= std::numeric_limits::max()); for (size_t thread_idx = 0; thread_idx < num_threads; ++thread_idx) { @@ -1160,7 +1174,7 @@ void StorageKeeperMap::mutate(const MutationCommands & commands, ContextPtr loca bool strict = local_context->getSettingsRef().keeper_map_strict_mode; - assert(commands.size() == 1); + chassert(commands.size() == 1); auto metadata_snapshot = getInMemoryMetadataPtr(); auto storage = getStorageID(); @@ -1236,7 +1250,7 @@ void StorageKeeperMap::mutate(const MutationCommands & commands, ContextPtr loca return; } - assert(commands.front().type == MutationCommand::Type::UPDATE); + chassert(commands.front().type == MutationCommand::Type::UPDATE); if (commands.front().column_to_update_expression.contains(primary_key)) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Primary key cannot be updated (cannot update column {})", primary_key); From 2530c5eb41a759baded5380a4e697c2e884c0abd Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Tue, 30 Jul 2024 12:37:35 +0200 Subject: [PATCH 456/661] Fix tests --- tests/queries/0_stateless/02952_archive_parsing.reference | 0 tests/queries/0_stateless/02952_archive_parsing.sql | 1 - tests/queries/0_stateless/03214_parsing_archive_name_file.sh | 2 +- 3 files changed, 1 insertion(+), 2 deletions(-) delete mode 100644 tests/queries/0_stateless/02952_archive_parsing.reference delete mode 100644 tests/queries/0_stateless/02952_archive_parsing.sql diff --git a/tests/queries/0_stateless/02952_archive_parsing.reference b/tests/queries/0_stateless/02952_archive_parsing.reference deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/tests/queries/0_stateless/02952_archive_parsing.sql b/tests/queries/0_stateless/02952_archive_parsing.sql deleted file mode 100644 index 49b0223e6ec..00000000000 --- a/tests/queries/0_stateless/02952_archive_parsing.sql +++ /dev/null @@ -1 +0,0 @@ -SELECT * FROM file('::a'); -- { serverError BAD_ARGUMENTS } diff --git a/tests/queries/0_stateless/03214_parsing_archive_name_file.sh b/tests/queries/0_stateless/03214_parsing_archive_name_file.sh index 2f77627f6be..b54cbb10aa6 100755 --- a/tests/queries/0_stateless/03214_parsing_archive_name_file.sh +++ b/tests/queries/0_stateless/03214_parsing_archive_name_file.sh @@ -11,7 +11,7 @@ function try_to_read_file() settings=$3 echo $file_argument $settings - $CLICKHOUSE_LOCAL -q "SELECT * FROM file('$file_argument') $settings" 2>&1 | rg -c "Cannot stat file.*$file_to_read" + $CLICKHOUSE_LOCAL -q "SELECT * FROM file('$file_argument') $settings" 2>&1 | grep -c "Cannot stat file.*$file_to_read" } # if archive extension is not detected for part before '::', path is taken as is From 27a15bc5ace68acb0ba62791ca6e3d3f17ae569c Mon Sep 17 00:00:00 2001 From: avogar Date: Tue, 30 Jul 2024 10:39:35 +0000 Subject: [PATCH 457/661] Remove sh files --- .../03036_dynamic_read_subcolumns_1.sh | 19 ------------------- .../03036_dynamic_read_subcolumns_2.sh | 19 ------------------- .../03036_dynamic_read_subcolumns_3.sh | 19 ------------------- 3 files changed, 57 deletions(-) delete mode 100755 tests/queries/0_stateless/03036_dynamic_read_subcolumns_1.sh delete mode 100755 tests/queries/0_stateless/03036_dynamic_read_subcolumns_2.sh delete mode 100755 tests/queries/0_stateless/03036_dynamic_read_subcolumns_3.sh diff --git a/tests/queries/0_stateless/03036_dynamic_read_subcolumns_1.sh b/tests/queries/0_stateless/03036_dynamic_read_subcolumns_1.sh deleted file mode 100755 index ed548ae74e9..00000000000 --- a/tests/queries/0_stateless/03036_dynamic_read_subcolumns_1.sh +++ /dev/null @@ -1,19 +0,0 @@ -#!/usr/bin/env bash -# Tags: long - -CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# shellcheck source=../shell_config.sh -. "$CUR_DIR"/../shell_config.sh - - -# shellcheck source=./03036_dynamic_read_subcolumns.lib -. "$CUR_DIR"/03036_dynamic_read_subcolumns.lib - -CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --use_variant_as_common_type=1 --allow_experimental_dynamic_type=1" - -$CH_CLIENT -q "drop table if exists test;" - -echo "Memory" -$CH_CLIENT -q "create table test (id UInt64, d Dynamic) engine=Memory" -test -$CH_CLIENT -q "drop table test;" diff --git a/tests/queries/0_stateless/03036_dynamic_read_subcolumns_2.sh b/tests/queries/0_stateless/03036_dynamic_read_subcolumns_2.sh deleted file mode 100755 index 95dafcf5832..00000000000 --- a/tests/queries/0_stateless/03036_dynamic_read_subcolumns_2.sh +++ /dev/null @@ -1,19 +0,0 @@ -#!/usr/bin/env bash -# Tags: long - -CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# shellcheck source=../shell_config.sh -. "$CUR_DIR"/../shell_config.sh - - -# shellcheck source=./03036_dynamic_read_subcolumns.lib -. "$CUR_DIR"/03036_dynamic_read_subcolumns.lib - -CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --use_variant_as_common_type=1 --allow_experimental_dynamic_type=1" - -$CH_CLIENT -q "drop table if exists test;" - -echo "MergeTree compact" -$CH_CLIENT -q "create table test (id UInt64, d Dynamic) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000;" -test -$CH_CLIENT -q "drop table test;" diff --git a/tests/queries/0_stateless/03036_dynamic_read_subcolumns_3.sh b/tests/queries/0_stateless/03036_dynamic_read_subcolumns_3.sh deleted file mode 100755 index a3c2d93e568..00000000000 --- a/tests/queries/0_stateless/03036_dynamic_read_subcolumns_3.sh +++ /dev/null @@ -1,19 +0,0 @@ -#!/usr/bin/env bash -# Tags: long - -CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# shellcheck source=../shell_config.sh -. "$CUR_DIR"/../shell_config.sh - - -# shellcheck source=./03036_dynamic_read_subcolumns.lib -. "$CUR_DIR"/03036_dynamic_read_subcolumns.lib - -CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --use_variant_as_common_type=1 --allow_experimental_dynamic_type=1" - -$CH_CLIENT -q "drop table if exists test;" - -echo "MergeTree wide" -$CH_CLIENT -q "create table test (id UInt64, d Dynamic) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1;" -test -$CH_CLIENT -q "drop table test;" From 72d6467fd2c34a82e1ef8ac73a451240843279a6 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Tue, 30 Jul 2024 10:43:21 +0000 Subject: [PATCH 458/661] Bump Azure to 1.13 --- contrib/azure | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/azure b/contrib/azure index ea3e19a7be0..67272b7ee0a 160000 --- a/contrib/azure +++ b/contrib/azure @@ -1 +1 @@ -Subproject commit ea3e19a7be08519134c643177d56c7484dfec884 +Subproject commit 67272b7ee0adff6b69921b26eb071ba1a353062c From d4537d91875d1ecec832af94fde15073c45a63d7 Mon Sep 17 00:00:00 2001 From: Nikita Fomichev Date: Tue, 30 Jul 2024 12:07:47 +0200 Subject: [PATCH 459/661] Add `groupConcat` to fuzzer https://github.com/ClickHouse/ClickHouse/pull/65384 --- tests/fuzz/dictionaries/functions.dict | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/fuzz/dictionaries/functions.dict b/tests/fuzz/dictionaries/functions.dict index ec7f8017fb2..6f2a88c22fa 100644 --- a/tests/fuzz/dictionaries/functions.dict +++ b/tests/fuzz/dictionaries/functions.dict @@ -1588,6 +1588,7 @@ "groupBitmapXorResample" "groupBitmapXorSimpleState" "groupBitmapXorState" +"groupConcat" "groupUniqArray" "groupUniqArrayArgMax" "groupUniqArrayArgMin" From d4d3d590e38436da44b13dbf11a92cc6d00863e7 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Mon, 29 Jul 2024 20:06:55 +0000 Subject: [PATCH 460/661] Remove obsolete --multiquery parameter from tests --- .../integration/test_distributed_format/test.py | 8 ++++---- ...80_client_break_at_exception_in_batch_mode.sh | 2 +- .../00463_long_sessions_in_http_interface.sh | 10 +++++----- .../0_stateless/00474_readonly_settings.sh | 4 ++-- .../0_stateless/00612_pk_in_tuple_perf.sh | 4 ++-- .../0_stateless/00630_arbitrary_csv_delimiter.sh | 6 +++--- .../00650_csv_with_specified_quote_rule.sh | 4 ++-- ...00651_default_database_on_client_reconnect.sh | 2 +- .../00652_mutations_default_database.sh | 2 +- ...cated_mutations_default_database_zookeeper.sh | 2 +- .../00699_materialized_view_mutations.sh | 8 ++++---- .../00704_drop_truncate_memory_table.sh | 6 +++--- .../0_stateless/00705_drop_create_merge_tree.sh | 4 ++-- .../0_stateless/00763_lock_buffer_long.sh | 4 ++-- ...3_long_lock_buffer_alter_destination_table.sh | 4 ++-- .../00825_protobuf_format_array_3dim.sh | 2 +- .../00825_protobuf_format_array_of_arrays.sh | 2 +- .../00825_protobuf_format_enum_mapping.sh | 2 +- .../0_stateless/00825_protobuf_format_map.sh | 2 +- .../00825_protobuf_format_nested_in_nested.sh | 2 +- .../00825_protobuf_format_nested_optional.sh | 2 +- .../00825_protobuf_format_no_length_delimiter.sh | 6 +++--- .../0_stateless/00825_protobuf_format_persons.sh | 4 ++-- ...5_protobuf_format_skipped_column_in_nested.sh | 2 +- .../00825_protobuf_format_splitted_nested.sh | 2 +- .../0_stateless/00825_protobuf_format_squares.sh | 2 +- .../00825_protobuf_format_table_default.sh | 2 +- .../0_stateless/00900_long_parquet_load.sh | 2 +- .../0_stateless/00900_orc_arrow_parquet_maps.sh | 2 +- .../00937_format_schema_rows_template.sh | 4 ++-- .../0_stateless/00956_sensitive_data_masking.sh | 12 ++++++------ .../01019_alter_materialized_view_atomic.sh | 2 +- .../01019_alter_materialized_view_consistent.sh | 2 +- .../0_stateless/01035_lc_empty_part_bug.sh | 4 ++-- ...ystem_reload_dictionary_reloads_completely.sh | 2 +- .../01052_window_view_proc_tumble_to_now.sh | 2 +- .../01053_window_view_proc_hop_to_now.sh | 2 +- .../01054_window_view_proc_tumble_to.sh | 2 +- .../0_stateless/01055_window_view_proc_hop_to.sh | 2 +- ...057_window_view_event_tumble_to_strict_asc.sh | 2 +- .../01058_window_view_event_hop_to_strict_asc.sh | 2 +- .../01060_window_view_event_tumble_to_asc.sh | 2 +- .../01061_window_view_event_hop_to_asc.sh | 2 +- .../01063_window_view_event_tumble_to_bounded.sh | 2 +- .../01064_window_view_event_hop_to_bounded.sh | 2 +- ...w_view_event_tumble_to_strict_asc_lateness.sh | 2 +- ...7_window_view_event_tumble_to_asc_lateness.sh | 2 +- ...ndow_view_event_tumble_to_bounded_lateness.sh | 2 +- .../01071_window_view_event_tumble_asc_join.sh | 2 +- ...01072_window_view_multiple_columns_groupby.sh | 2 +- ...3_window_view_event_tumble_to_asc_populate.sh | 4 ++-- ...window_view_event_tumble_asc_join_populate.sh | 2 +- ...75_window_view_proc_tumble_to_now_populate.sh | 2 +- .../01076_window_view_alter_query_to.sh | 4 ++-- ...7_window_view_alter_query_to_modify_source.sh | 4 ++-- ...1079_window_view_inner_table_memory_tumble.sh | 2 +- .../01080_window_view_inner_table_memory_hop.sh | 2 +- .../01081_window_view_target_table_engine.sh | 2 +- .../0_stateless/01083_window_view_select.sh | 2 +- .../01084_window_view_with_table_identifier.sh | 2 +- .../0_stateless/01086_window_view_cleanup.sh | 2 +- .../0_stateless/01087_window_view_alter_query.sh | 4 ++-- .../01088_window_view_default_column.sh | 2 +- .../0_stateless/01133_begin_commit_race.sh | 6 +++--- .../01169_alter_partition_isolation_stress.sh | 4 ++-- ...01169_old_alter_partition_isolation_stress.sh | 10 +++++----- .../01171_mv_select_insert_isolation_long.sh | 14 +++++++------- .../0_stateless/01174_select_insert_isolation.sh | 8 ++++---- .../0_stateless/01198_client_quota_key.sh | 2 +- .../0_stateless/01285_engine_join_donmikel.sh | 4 ++-- .../0_stateless/01293_optimize_final_force.sh | 2 +- .../queries/0_stateless/01304_direct_io_long.sh | 4 ++-- ...orage_file_tsv_csv_with_names_write_prefix.sh | 4 ++-- .../0_stateless/01443_merge_truncate_long.sh | 2 +- .../01527_clickhouse_local_optimize.sh | 2 +- .../01543_avro_deserialization_with_lc.sh | 2 +- .../0_stateless/01544_file_engine_settings.sh | 4 ++-- .../0_stateless/01600_detach_permanently.sh | 2 +- .../01600_parts_states_metrics_long.sh | 2 +- tests/queries/0_stateless/01606_git_import.sh | 4 ++-- .../0_stateless/01607_arrays_as_nested_csv.sh | 4 ++-- .../0_stateless/01632_tinylog_read_write.sh | 4 ++-- .../01658_read_file_to_stringcolumn.sh | 4 ++-- .../01666_merge_tree_max_query_limit.sh | 2 +- .../0_stateless/01747_system_session_log_long.sh | 6 +++--- .../01801_nullable_low_cardinality_tsv.sh | 2 +- .../01834_alias_columns_laziness_filimonov.sh | 2 +- .../01923_network_receive_time_metric_insert.sh | 4 ++-- .../01939_network_receive_bytes_metrics.sh | 4 ++-- .../01946_test_wrong_host_name_access.sh | 2 +- ..._with_escape_sequence_at_the_end_of_buffer.sh | 2 +- tests/queries/0_stateless/02009_from_infile.sh | 2 +- .../0_stateless/02024_compression_in_query.sh | 6 +++--- .../02048_parallel_reading_from_infile.sh | 6 +++--- ...02104_clickhouse_local_columns_description.sh | 2 +- ...insert_deduplication_token_multiple_blocks.sh | 10 +++++----- ...eduplication_token_multiple_blocks_replica.sh | 10 +++++----- .../queries/0_stateless/02125_many_mutations.sh | 6 +++--- .../0_stateless/02125_many_mutations_2.sh | 6 +++--- .../queries/0_stateless/02135_local_create_db.sh | 2 +- .../0_stateless/02151_client_option_echo.sh | 4 ++-- .../0_stateless/02151_hash_table_sizes_stats.sh | 2 +- .../02151_hash_table_sizes_stats_distributed.sh | 2 +- .../02158_explain_ast_alter_commands.sh | 2 +- .../02206_clickhouse_local_use_database.sh | 2 +- .../02226_filesystem_cache_profile_events.sh | 14 +++++++------- .../02227_test_create_empty_sqlite_db.sh | 4 ++-- .../0_stateless/02235_remote_fs_cache_stress.sh | 6 +++--- .../02240_protobuflist_format_persons.sh | 4 ++-- .../02246_clickhouse_local_drop_database.sh | 4 ++-- .../0_stateless/02286_drop_filesystem_cache.sh | 2 +- .../02337_drop_filesystem_cache_access.sh | 8 ++++---- .../0_stateless/02364_window_view_segfault.sh | 2 +- .../0_stateless/02373_datetime64_monotonicity.sh | 2 +- .../0_stateless/02416_rename_database_rbac.sh | 4 ++-- .../02435_rollback_cancelled_queries.sh | 2 +- ...60_projections_and_aggregate_null_if_empty.sh | 2 +- .../02494_query_cache_user_isolation.sh | 8 ++++---- ...503_cache_on_write_with_small_segment_size.sh | 4 ++-- .../queries/0_stateless/02521_merge_over_gap.sh | 2 +- .../02530_dictionaries_update_field.sh | 2 +- .../0_stateless/02702_allow_skip_errors_enum.sh | 2 +- .../0_stateless/02704_keeper_map_zk_nodes.sh | 2 +- ...ckhouse_local_implicit_file_table_function.sh | 4 ++-- .../02712_bool_better_exception_message.sh | 8 ++++---- .../0_stateless/02722_database_filesystem.sh | 8 ++++---- tests/queries/0_stateless/02724_database_s3.sh | 14 +++++++------- tests/queries/0_stateless/02725_database_hdfs.sh | 10 +++++----- .../0_stateless/02725_local_query_parameters.sh | 2 +- .../02751_multiquery_with_argument.reference | 2 -- .../02751_multiquery_with_argument.sh | 16 ++++++---------- .../02815_no_throw_in_simple_queries.sh | 2 +- .../02843_insertion_table_schema_infer.sh | 2 +- .../02864_restore_table_with_broken_part.sh | 2 +- ...ultiple_batches_array_inconsistent_offsets.sh | 2 +- .../0_stateless/02875_merge_engine_set_index.sh | 2 +- .../02877_optimize_read_in_order_from_view.sh | 2 +- ...02884_create_view_with_sql_security_option.sh | 16 ++++++++-------- ...885_async_insert_access_check_for_defaults.sh | 4 ++-- ...900_clickhouse_local_drop_current_database.sh | 2 +- .../02956_clickhouse_local_system_parts.sh | 2 +- .../02973_backup_of_in_memory_compressed.sh | 10 +++++----- .../02973_parse_crlf_with_tsv_files.sh | 2 +- .../0_stateless/02995_forget_partition.sh | 4 ++-- tests/queries/0_stateless/02995_index_1.sh | 6 +++--- tests/queries/0_stateless/02995_index_10.sh | 6 +++--- tests/queries/0_stateless/02995_index_2.sh | 6 +++--- tests/queries/0_stateless/02995_index_3.sh | 6 +++--- tests/queries/0_stateless/02995_index_4.sh | 6 +++--- tests/queries/0_stateless/02995_index_5.sh | 6 +++--- tests/queries/0_stateless/02995_index_6.sh | 6 +++--- tests/queries/0_stateless/02995_index_7.sh | 6 +++--- tests/queries/0_stateless/02995_index_8.sh | 6 +++--- tests/queries/0_stateless/02995_index_9.sh | 6 +++--- .../0_stateless/02998_native_parquet_reader.sh | 2 +- .../03001_backup_matview_after_modify_query.sh | 2 +- .../03001_matview_columns_after_modify_query.sh | 2 +- .../03006_correct_revoke_for_partial_rights.sh | 2 +- .../03147_system_columns_access_checks.sh | 4 ++-- .../0_stateless/03201_local_named_collections.sh | 6 +++--- .../0_stateless/03212_thousand_exceptions.sh | 2 +- 161 files changed, 316 insertions(+), 322 deletions(-) diff --git a/tests/integration/test_distributed_format/test.py b/tests/integration/test_distributed_format/test.py index 91afb8f7b34..5611f465e8b 100644 --- a/tests/integration/test_distributed_format/test.py +++ b/tests/integration/test_distributed_format/test.py @@ -55,7 +55,7 @@ def test_single_file(started_cluster, cluster): path = get_dist_path(cluster, "distr_1", 1) query = f"select * from file('{path}/1.bin', 'Distributed')" out = node.exec_in_container( - ["/usr/bin/clickhouse", "local", "--multiquery", "--stacktrace", "-q", query] + ["/usr/bin/clickhouse", "local", "--stacktrace", "-q", query] ) assert out == "1\ta\n2\tbb\n3\tccc\n" @@ -65,7 +65,7 @@ def test_single_file(started_cluster, cluster): select * from t; """ out = node.exec_in_container( - ["/usr/bin/clickhouse", "local", "--multiquery", "--stacktrace", "-q", query] + ["/usr/bin/clickhouse", "local", "--stacktrace", "-q", query] ) assert out == "1\ta\n2\tbb\n3\tccc\n" @@ -106,7 +106,7 @@ def test_two_files(started_cluster, cluster): select * from t order by x; """ out = node.exec_in_container( - ["/usr/bin/clickhouse", "local", "--multiquery", "--stacktrace", "-q", query] + ["/usr/bin/clickhouse", "local", "--stacktrace", "-q", query] ) assert out == "0\t_\n1\ta\n2\tbb\n3\tccc\n" @@ -141,7 +141,7 @@ def test_single_file_old(started_cluster, cluster): select * from t; """ out = node.exec_in_container( - ["/usr/bin/clickhouse", "local", "--multiquery", "--stacktrace", "-q", query] + ["/usr/bin/clickhouse", "local", "--stacktrace", "-q", query] ) assert out == "1\ta\n2\tbb\n3\tccc\n" diff --git a/tests/queries/0_stateless/00380_client_break_at_exception_in_batch_mode.sh b/tests/queries/0_stateless/00380_client_break_at_exception_in_batch_mode.sh index 62f891db33c..0aab52d15c2 100755 --- a/tests/queries/0_stateless/00380_client_break_at_exception_in_batch_mode.sh +++ b/tests/queries/0_stateless/00380_client_break_at_exception_in_batch_mode.sh @@ -5,4 +5,4 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -$CLICKHOUSE_CLIENT --multiquery --query="SELECT 1; SELECT xyz; SELECT 2;" 2> /dev/null || true; +$CLICKHOUSE_CLIENT --query="SELECT 1; SELECT xyz; SELECT 2;" 2> /dev/null || true; diff --git a/tests/queries/0_stateless/00463_long_sessions_in_http_interface.sh b/tests/queries/0_stateless/00463_long_sessions_in_http_interface.sh index d41d6409315..6ee1649c9ed 100755 --- a/tests/queries/0_stateless/00463_long_sessions_in_http_interface.sh +++ b/tests/queries/0_stateless/00463_long_sessions_in_http_interface.sh @@ -25,7 +25,7 @@ ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&session_id=${CLICKHOUSE_DATABASE}_4&se ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&session_id=${CLICKHOUSE_DATABASE}_5&session_timeout=60" --data-binary "SELECT 1" echo "Sessions are local per user:" -${CLICKHOUSE_CLIENT} --multiquery --query "DROP USER IF EXISTS test_00463; CREATE USER test_00463; GRANT ALL ON *.* TO test_00463;" +${CLICKHOUSE_CLIENT} --query "DROP USER IF EXISTS test_00463; CREATE USER test_00463; GRANT ALL ON *.* TO test_00463;" ${CLICKHOUSE_CURL} -sS -X POST "${CLICKHOUSE_URL}&session_id=${CLICKHOUSE_DATABASE}_6&session_timeout=600" --data-binary "CREATE TEMPORARY TABLE t (s String)" ${CLICKHOUSE_CURL} -sS -X POST "${CLICKHOUSE_URL}&session_id=${CLICKHOUSE_DATABASE}_6" --data-binary "INSERT INTO t VALUES ('Hello')" @@ -37,7 +37,7 @@ ${CLICKHOUSE_CURL} -sS -X POST "${CLICKHOUSE_URL}&user=test_00463&session_id=${C ${CLICKHOUSE_CURL} -sS -X POST "${CLICKHOUSE_URL}&session_id=${CLICKHOUSE_DATABASE}_6" --data-binary "SELECT * FROM t" ${CLICKHOUSE_CURL} -sS -X POST "${CLICKHOUSE_URL}&user=test_00463&session_id=${CLICKHOUSE_DATABASE}_6" --data-binary "SELECT * FROM t" -${CLICKHOUSE_CLIENT} --multiquery --query "DROP USER test_00463"; +${CLICKHOUSE_CLIENT} --query "DROP USER test_00463"; echo "And cannot be accessed for a non-existent user:" ${CLICKHOUSE_CURL} -sS -X POST "${CLICKHOUSE_URL}&user=test_00463&session_id=${CLICKHOUSE_DATABASE}_6" --data-binary "SELECT * FROM t" | grep -c -F 'Exception' @@ -59,7 +59,7 @@ done echo "A session successfully expire after a timeout and the session's temporary table shadows the permanent table:" # An infinite loop is required to make the test reliable. We will check that the timeout corresponds to the observed time at least once -${CLICKHOUSE_CLIENT} --multiquery --query "DROP TABLE IF EXISTS t; CREATE TABLE t (s String) ENGINE = Memory; INSERT INTO t VALUES ('World');" +${CLICKHOUSE_CLIENT} --query "DROP TABLE IF EXISTS t; CREATE TABLE t (s String) ENGINE = Memory; INSERT INTO t VALUES ('World');" while true do ( @@ -70,7 +70,7 @@ do ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&session_id=${CLICKHOUSE_DATABASE}_8" --data-binary "SELECT * FROM t" ) | tr -d '\n' | grep -F 'HelloWorld' && break || sleep 1 done -${CLICKHOUSE_CLIENT} --multiquery --query "DROP TABLE t" +${CLICKHOUSE_CLIENT} --query "DROP TABLE t" echo "A session cannot be used by concurrent connections:" @@ -83,5 +83,5 @@ do done ${CLICKHOUSE_CURL} -sS -X POST "${CLICKHOUSE_URL}&session_id=${CLICKHOUSE_DATABASE}_9" --data-binary "SELECT 1" | grep -c -F 'SESSION_IS_LOCKED' -${CLICKHOUSE_CLIENT} --multiquery --query "KILL QUERY WHERE query_id = '${CLICKHOUSE_DATABASE}_9' SYNC FORMAT Null"; +${CLICKHOUSE_CLIENT} --query "KILL QUERY WHERE query_id = '${CLICKHOUSE_DATABASE}_9' SYNC FORMAT Null"; wait diff --git a/tests/queries/0_stateless/00474_readonly_settings.sh b/tests/queries/0_stateless/00474_readonly_settings.sh index 3a857d81a74..ed3558c6d7a 100755 --- a/tests/queries/0_stateless/00474_readonly_settings.sh +++ b/tests/queries/0_stateless/00474_readonly_settings.sh @@ -8,8 +8,8 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) $CLICKHOUSE_CLIENT --query="select toUInt64(pow(2, 62)) as value format JSON" --output_format_json_quote_64bit_integers=0 | grep value $CLICKHOUSE_CLIENT --query="select toUInt64(pow(2, 62)) as value format JSON" --output_format_json_quote_64bit_integers=1 | grep value -$CLICKHOUSE_CLIENT --readonly=1 --multiquery --query="set output_format_json_quote_64bit_integers=1 ; select toUInt64(pow(2, 63)) as value format JSON" --server_logs_file=/dev/null 2>&1 | grep -o -q 'value\|Cannot modify .* setting in readonly mode' && echo "OK" || echo "FAIL" -$CLICKHOUSE_CLIENT --readonly=1 --multiquery --query="set output_format_json_quote_64bit_integers=0 ; select toUInt64(pow(2, 63)) as value format JSON" --server_logs_file=/dev/null 2>&1 | grep -o -q 'value\|Cannot modify .* setting in readonly mode' && echo "OK" || echo "FAIL" +$CLICKHOUSE_CLIENT --readonly=1 --query="set output_format_json_quote_64bit_integers=1 ; select toUInt64(pow(2, 63)) as value format JSON" --server_logs_file=/dev/null 2>&1 | grep -o -q 'value\|Cannot modify .* setting in readonly mode' && echo "OK" || echo "FAIL" +$CLICKHOUSE_CLIENT --readonly=1 --query="set output_format_json_quote_64bit_integers=0 ; select toUInt64(pow(2, 63)) as value format JSON" --server_logs_file=/dev/null 2>&1 | grep -o -q 'value\|Cannot modify .* setting in readonly mode' && echo "OK" || echo "FAIL" ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&query=SELECT+toUInt64(pow(2,+63))+as+value+format+JSON&output_format_json_quote_64bit_integers=1" | grep value ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&query=SELECT+toUInt64(pow(2,+63))+as+value+format+JSON&output_format_json_quote_64bit_integers=0" | grep value diff --git a/tests/queries/0_stateless/00612_pk_in_tuple_perf.sh b/tests/queries/0_stateless/00612_pk_in_tuple_perf.sh index c8297635c43..7b2973669de 100755 --- a/tests/queries/0_stateless/00612_pk_in_tuple_perf.sh +++ b/tests/queries/0_stateless/00612_pk_in_tuple_perf.sh @@ -7,7 +7,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -$CLICKHOUSE_CLIENT --multiquery </dev/null ||: +${CLICKHOUSE_CLIENT} --ignore-error --query "DROP TABLE IF EXISTS tab_00651; CREATE TABLE tab_00651 (val UInt64) engine = Memory; SHOW CREATE TABLE tab_00651 format abcd; DESC tab_00651; DROP TABLE tab_00651;" 2>/dev/null ||: diff --git a/tests/queries/0_stateless/00652_mutations_default_database.sh b/tests/queries/0_stateless/00652_mutations_default_database.sh index eed45540f9b..577943bc3fd 100755 --- a/tests/queries/0_stateless/00652_mutations_default_database.sh +++ b/tests/queries/0_stateless/00652_mutations_default_database.sh @@ -4,7 +4,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -${CLICKHOUSE_CLIENT} --multiquery --mutations_sync=1 << EOF +${CLICKHOUSE_CLIENT} --mutations_sync=1 << EOF DROP TABLE IF EXISTS mutations; DROP TABLE IF EXISTS for_subquery; diff --git a/tests/queries/0_stateless/00652_replicated_mutations_default_database_zookeeper.sh b/tests/queries/0_stateless/00652_replicated_mutations_default_database_zookeeper.sh index 0ac5a2f748a..d4f6d3b290c 100755 --- a/tests/queries/0_stateless/00652_replicated_mutations_default_database_zookeeper.sh +++ b/tests/queries/0_stateless/00652_replicated_mutations_default_database_zookeeper.sh @@ -9,7 +9,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=./mergetree_mutations.lib . "$CURDIR"/mergetree_mutations.lib -${CLICKHOUSE_CLIENT} --allow_nondeterministic_mutations=1 --multiquery << EOF +${CLICKHOUSE_CLIENT} --allow_nondeterministic_mutations=1 << EOF DROP TABLE IF EXISTS mutations_r1; DROP TABLE IF EXISTS for_subquery; diff --git a/tests/queries/0_stateless/00699_materialized_view_mutations.sh b/tests/queries/0_stateless/00699_materialized_view_mutations.sh index a0f7db536dc..07ca9bc0f67 100755 --- a/tests/queries/0_stateless/00699_materialized_view_mutations.sh +++ b/tests/queries/0_stateless/00699_materialized_view_mutations.sh @@ -7,7 +7,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CURDIR"/../shell_config.sh -${CLICKHOUSE_CLIENT} --multiquery --query=" +${CLICKHOUSE_CLIENT} --query=" DROP TABLE IF EXISTS view_00699; DROP TABLE IF EXISTS null_00699; @@ -20,14 +20,14 @@ SELECT count(), min(x), max(x) FROM view_00699; ALTER TABLE null_00699 DELETE WHERE x % 2 = 0;" --mutations_sync=1 -${CLICKHOUSE_CLIENT} --multiquery --query=" +${CLICKHOUSE_CLIENT} --query=" SELECT count(), min(x), max(x) FROM null_00699; SELECT count(), min(x), max(x) FROM view_00699; ALTER TABLE view_00699 DELETE WHERE x % 2 = 0; " --mutations_sync=1 -${CLICKHOUSE_CLIENT} --multiquery --query=" +${CLICKHOUSE_CLIENT} --query=" SELECT count(), min(x), max(x) FROM null_00699; SELECT count(), min(x), max(x) FROM view_00699; @@ -35,7 +35,7 @@ ALTER TABLE null_00699 DELETE WHERE x % 2 = 1; ALTER TABLE view_00699 DELETE WHERE x % 2 = 1; " --mutations_sync=1 -${CLICKHOUSE_CLIENT} --multiquery --query=" +${CLICKHOUSE_CLIENT} --query=" SELECT count(), min(x), max(x) FROM null_00699; SELECT count(), min(x), max(x) FROM view_00699; diff --git a/tests/queries/0_stateless/00704_drop_truncate_memory_table.sh b/tests/queries/0_stateless/00704_drop_truncate_memory_table.sh index e1540d1a25e..e40da11b893 100755 --- a/tests/queries/0_stateless/00704_drop_truncate_memory_table.sh +++ b/tests/queries/0_stateless/00704_drop_truncate_memory_table.sh @@ -7,7 +7,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -${CLICKHOUSE_CLIENT} --multiquery --query=" +${CLICKHOUSE_CLIENT} --query=" DROP TABLE IF EXISTS memory; CREATE TABLE memory (x UInt64) ENGINE = Memory; @@ -21,13 +21,13 @@ INSERT INTO memory SELECT * FROM numbers(1000);" # But if the table will be dropped before query - just pass. # It's Ok, because otherwise the test will depend on the race condition in the test itself. -${CLICKHOUSE_CLIENT} --multiquery --query=" +${CLICKHOUSE_CLIENT} --query=" SET max_threads = 1; SELECT count() FROM memory WHERE NOT ignore(sleep(0.0001));" 2>&1 | grep -c -P '^1000$|^0$|Exception' & sleep 0.05; -${CLICKHOUSE_CLIENT} --multiquery --query=" +${CLICKHOUSE_CLIENT} --query=" TRUNCATE TABLE memory; DROP TABLE memory; " diff --git a/tests/queries/0_stateless/00705_drop_create_merge_tree.sh b/tests/queries/0_stateless/00705_drop_create_merge_tree.sh index ea8b9d02e49..fd002668696 100755 --- a/tests/queries/0_stateless/00705_drop_create_merge_tree.sh +++ b/tests/queries/0_stateless/00705_drop_create_merge_tree.sh @@ -5,8 +5,8 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -yes 'CREATE TABLE IF NOT EXISTS table (x UInt8) ENGINE = MergeTree ORDER BY tuple();' | head -n 1000 | $CLICKHOUSE_CLIENT --multiquery & -yes 'DROP TABLE IF EXISTS table;' | head -n 1000 | $CLICKHOUSE_CLIENT --multiquery & +yes 'CREATE TABLE IF NOT EXISTS table (x UInt8) ENGINE = MergeTree ORDER BY tuple();' | head -n 1000 | $CLICKHOUSE_CLIENT & +yes 'DROP TABLE IF EXISTS table;' | head -n 1000 | $CLICKHOUSE_CLIENT & wait ${CLICKHOUSE_CLIENT} --query "DROP TABLE IF EXISTS table" diff --git a/tests/queries/0_stateless/00763_lock_buffer_long.sh b/tests/queries/0_stateless/00763_lock_buffer_long.sh index 444a66767aa..92f917aa287 100755 --- a/tests/queries/0_stateless/00763_lock_buffer_long.sh +++ b/tests/queries/0_stateless/00763_lock_buffer_long.sh @@ -16,12 +16,12 @@ ${CLICKHOUSE_CLIENT} --query="CREATE TABLE buffer_00763_2 (s String) ENGINE = Bu function thread1() { - seq 1 500 | sed -r -e 's/.+/DROP TABLE IF EXISTS mt_00763_2; CREATE TABLE mt_00763_2 (s String) ENGINE = MergeTree ORDER BY s; INSERT INTO mt_00763_2 SELECT toString(number) FROM numbers(10);/' | ${CLICKHOUSE_CLIENT} --fsync-metadata 0 --multiquery --ignore-error ||: + seq 1 500 | sed -r -e 's/.+/DROP TABLE IF EXISTS mt_00763_2; CREATE TABLE mt_00763_2 (s String) ENGINE = MergeTree ORDER BY s; INSERT INTO mt_00763_2 SELECT toString(number) FROM numbers(10);/' | ${CLICKHOUSE_CLIENT} --fsync-metadata 0 --ignore-error ||: } function thread2() { - seq 1 500 | sed -r -e 's/.+/SELECT count() FROM buffer_00763_2;/' | ${CLICKHOUSE_CLIENT} --multiquery --server_logs_file='/dev/null' --ignore-error 2>&1 | grep -vP '^0$|^10$|^Received exception|^Code: 60|^Code: 218|^Code: 473' | grep -v '(query: ' + seq 1 500 | sed -r -e 's/.+/SELECT count() FROM buffer_00763_2;/' | ${CLICKHOUSE_CLIENT} --server_logs_file='/dev/null' --ignore-error 2>&1 | grep -vP '^0$|^10$|^Received exception|^Code: 60|^Code: 218|^Code: 473' | grep -v '(query: ' } thread1 & diff --git a/tests/queries/0_stateless/00763_long_lock_buffer_alter_destination_table.sh b/tests/queries/0_stateless/00763_long_lock_buffer_alter_destination_table.sh index 7e2384cfc52..79df667d45f 100755 --- a/tests/queries/0_stateless/00763_long_lock_buffer_alter_destination_table.sh +++ b/tests/queries/0_stateless/00763_long_lock_buffer_alter_destination_table.sh @@ -18,12 +18,12 @@ ${CLICKHOUSE_CLIENT} --query="INSERT INTO mt_00763_1 VALUES (1, '1'), (2, '2'), function thread1() { - seq 1 300 | sed -r -e 's/.+/ALTER TABLE mt_00763_1 MODIFY column s UInt32; ALTER TABLE mt_00763_1 MODIFY column s String;/' | ${CLICKHOUSE_CLIENT} --multiquery --ignore-error ||: + seq 1 300 | sed -r -e 's/.+/ALTER TABLE mt_00763_1 MODIFY column s UInt32; ALTER TABLE mt_00763_1 MODIFY column s String;/' | ${CLICKHOUSE_CLIENT} --ignore-error ||: } function thread2() { - seq 1 2000 | sed -r -e 's/.+/SELECT sum(length(s)) FROM buffer_00763_1;/' | ${CLICKHOUSE_CLIENT} --multiquery --ignore-error 2>&1 | grep -vP '(^3$|^Received exception from server|^Code: 473)' + seq 1 2000 | sed -r -e 's/.+/SELECT sum(length(s)) FROM buffer_00763_1;/' | ${CLICKHOUSE_CLIENT} --ignore-error 2>&1 | grep -vP '(^3$|^Received exception from server|^Code: 473)' } thread1 & diff --git a/tests/queries/0_stateless/00825_protobuf_format_array_3dim.sh b/tests/queries/0_stateless/00825_protobuf_format_array_3dim.sh index 3cd842a10ba..468ced802cd 100755 --- a/tests/queries/0_stateless/00825_protobuf_format_array_3dim.sh +++ b/tests/queries/0_stateless/00825_protobuf_format_array_3dim.sh @@ -9,7 +9,7 @@ SCHEMADIR=$CURDIR/format_schemas set -eo pipefail # Run the client. -$CLICKHOUSE_CLIENT --multiquery < /dev/null < /dev/null < $row_format_file -$CLICKHOUSE_CLIENT --multiline --multiquery --query "SELECT * FROM template GROUP BY question, answer, likes, date WITH TOTALS ORDER BY date LIMIT 3 FORMAT Template SETTINGS \ +$CLICKHOUSE_CLIENT --multiline --query "SELECT * FROM template GROUP BY question, answer, likes, date WITH TOTALS ORDER BY date LIMIT 3 FORMAT Template SETTINGS \ format_template_row = '$row_format_file', \ format_template_row_format = 'Question: \${question:Quoted}, Answer: \${answer:Quoted}, Number of Likes: \${likes:Raw}, Date: \${date:Raw}', \ format_template_rows_between_delimiter = ';\n'; --{clientError 474}" @@ -38,7 +38,7 @@ format_template_rows_between_delimiter = ';\n'"; # Test that if both format_template_result_format setting and format_template_resultset are provided, error is thrown resultset_output_file="$CURDIR"/"$CLICKHOUSE_TEST_UNIQUE_NAME"_template_output_format_resultset.tmp echo -ne '===== Resultset ===== \n \${data} \n ===============' > $resultset_output_file -$CLICKHOUSE_CLIENT --multiline --multiquery --query "SELECT * FROM template GROUP BY question, answer, likes, date WITH TOTALS ORDER BY date LIMIT 3 FORMAT Template SETTINGS \ +$CLICKHOUSE_CLIENT --multiline --query "SELECT * FROM template GROUP BY question, answer, likes, date WITH TOTALS ORDER BY date LIMIT 3 FORMAT Template SETTINGS \ format_template_resultset = '$resultset_output_file', \ format_template_resultset_format = '===== Resultset ===== \n \${data} \n ===============', \ format_template_row_format = 'Question: \${question:Quoted}, Answer: \${answer:Quoted}, Number of Likes: \${likes:Raw}, Date: \${date:Raw}', \ diff --git a/tests/queries/0_stateless/00956_sensitive_data_masking.sh b/tests/queries/0_stateless/00956_sensitive_data_masking.sh index 926557e4ba6..bd65b937648 100755 --- a/tests/queries/0_stateless/00956_sensitive_data_masking.sh +++ b/tests/queries/0_stateless/00956_sensitive_data_masking.sh @@ -17,7 +17,7 @@ echo 1 # normal execution $CLICKHOUSE_CLIENT \ --query="SELECT 'find_me_TOPSECRET=TOPSECRET' FROM numbers(1) FORMAT Null" \ - --log_queries=1 --ignore-error --multiquery >"$tmp_file" 2>&1 + --log_queries=1 --ignore-error >"$tmp_file" 2>&1 grep -F 'find_me_[hidden]' "$tmp_file" >/dev/null || echo 'fail 1a' grep -F 'TOPSECRET' "$tmp_file" && echo 'fail 1b' @@ -38,7 +38,7 @@ echo 3 # failure at before query start $CLICKHOUSE_CLIENT \ --query="SELECT 1 FROM system.numbers WHERE credit_card_number='find_me_TOPSECRET=TOPSECRET' FORMAT Null" \ - --log_queries=1 --ignore-error --multiquery |& grep -v '^(query: ' > "$tmp_file" + --log_queries=1 --ignore-error |& grep -v '^(query: ' > "$tmp_file" grep -F 'find_me_[hidden]' "$tmp_file" >/dev/null || echo 'fail 3a' grep -F 'TOPSECRET' "$tmp_file" && echo 'fail 3b' @@ -56,7 +56,7 @@ echo 4 # failure at the end of query $CLICKHOUSE_CLIENT \ --query="SELECT 'find_me_TOPSECRET=TOPSECRET', intDiv( 100, number - 10) FROM numbers(11) FORMAT Null" \ - --log_queries=1 --ignore-error --max_block_size=2 --multiquery |& grep -v '^(query: ' > "$tmp_file" + --log_queries=1 --ignore-error --max_block_size=2 |& grep -v '^(query: ' > "$tmp_file" grep -F 'find_me_[hidden]' "$tmp_file" >/dev/null || echo 'fail 4a' grep -F 'TOPSECRET' "$tmp_file" && echo 'fail 4b' @@ -67,7 +67,7 @@ rm -f "$tmp_file2" >/dev/null 2>&1 bash -c "$CLICKHOUSE_CLIENT \ --function_sleep_max_microseconds_per_block 60000000 \ --query=\"select sleepEachRow(1) from numbers(10) where ignore('find_me_TOPSECRET=TOPSECRET')=0 and ignore('fwerkh_that_magic_string_make_me_unique') = 0 FORMAT Null\" \ - --log_queries=1 --ignore-error --multiquery |& grep -v '^(query: ' > $tmp_file2" & + --log_queries=1 --ignore-error |& grep -v '^(query: ' > $tmp_file2" & rm -f "$tmp_file" >/dev/null 2>&1 # check that executing query doesn't expose secrets in processlist @@ -133,7 +133,7 @@ insert into sensitive select number as id, toDate('2019-01-01') as date, 'abcd' insert into sensitive select number as id, toDate('2019-01-01') as date, 'find_me_TOPSECRET=TOPSECRET' as value1, rand() as valuer from numbers(10); insert into sensitive select number as id, toDate('2019-01-01') as date, 'abcd' as value1, rand() as valuer from numbers(10000); select * from sensitive WHERE value1 = 'find_me_TOPSECRET=TOPSECRET' FORMAT Null; -drop table sensitive;" --log_queries=1 --ignore-error --multiquery >"$tmp_file" 2>&1 +drop table sensitive;" --log_queries=1 --ignore-error >"$tmp_file" 2>&1 grep -F 'find_me_[hidden]' "$tmp_file" >/dev/null || echo 'fail 8a' grep -F 'TOPSECRET' "$tmp_file" && echo 'fail 8b' @@ -144,7 +144,7 @@ echo 9 $CLICKHOUSE_CLIENT \ --server_logs_file=/dev/null \ --query="SELECT if( count() > 0, 'text_log non empty', 'text_log empty') FROM system.text_log WHERE event_date >= yesterday() and message like '%find_me%'; - select * from system.text_log where event_date >= yesterday() and message like '%TOPSECRET=TOPSECRET%';" --ignore-error --multiquery + select * from system.text_log where event_date >= yesterday() and message like '%TOPSECRET=TOPSECRET%';" --ignore-error echo 'finish' rm -f "$tmp_file" >/dev/null 2>&1 diff --git a/tests/queries/0_stateless/01019_alter_materialized_view_atomic.sh b/tests/queries/0_stateless/01019_alter_materialized_view_atomic.sh index 4bd21fcee02..eb12a76eb62 100755 --- a/tests/queries/0_stateless/01019_alter_materialized_view_atomic.sh +++ b/tests/queries/0_stateless/01019_alter_materialized_view_atomic.sh @@ -7,7 +7,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -$CLICKHOUSE_CLIENT --multiquery <&1| grep -Fa "Exception: " | grep -Fv UNKNOWN_STATUS_OF_TRANSACTION @@ -24,7 +24,7 @@ function begin_commit_readonly() function begin_rollback_readonly() { while true; do - $CLICKHOUSE_CLIENT --wait_changes_become_visible_after_commit_mode=wait_unknown --multiquery --query " + $CLICKHOUSE_CLIENT --wait_changes_become_visible_after_commit_mode=wait_unknown --query " BEGIN TRANSACTION; SET TRANSACTION SNAPSHOT 42; ROLLBACK;" @@ -34,7 +34,7 @@ function begin_rollback_readonly() function begin_insert_commit() { while true; do - $CLICKHOUSE_CLIENT --wait_changes_become_visible_after_commit_mode=async --multiquery --query " + $CLICKHOUSE_CLIENT --wait_changes_become_visible_after_commit_mode=async --query " BEGIN TRANSACTION; INSERT INTO mt VALUES ($RANDOM); COMMIT;" 2>&1| grep -Fa "Exception: " | grep -Fv UNKNOWN_STATUS_OF_TRANSACTION diff --git a/tests/queries/0_stateless/01169_alter_partition_isolation_stress.sh b/tests/queries/0_stateless/01169_alter_partition_isolation_stress.sh index d4884cbf457..8873fd88f0e 100755 --- a/tests/queries/0_stateless/01169_alter_partition_isolation_stress.sh +++ b/tests/queries/0_stateless/01169_alter_partition_isolation_stress.sh @@ -22,7 +22,7 @@ function thread_insert() set -eu val=1 while true; do - $CLICKHOUSE_CLIENT --multiquery --query " + $CLICKHOUSE_CLIENT --query " BEGIN TRANSACTION; INSERT INTO src VALUES /* ($val, 1) */ ($val, 1); INSERT INTO src VALUES /* ($val, 2) */ ($val, 2); @@ -210,7 +210,7 @@ function thread_select() set -eu while true; do output=$( - $CLICKHOUSE_CLIENT --multiquery --query " + $CLICKHOUSE_CLIENT --query " BEGIN TRANSACTION; -- no duplicates SELECT type, throwIf(count(n) != countDistinct(n)) FROM src GROUP BY type FORMAT Null; diff --git a/tests/queries/0_stateless/01169_old_alter_partition_isolation_stress.sh b/tests/queries/0_stateless/01169_old_alter_partition_isolation_stress.sh index 0d2016952d4..404042ab64e 100755 --- a/tests/queries/0_stateless/01169_old_alter_partition_isolation_stress.sh +++ b/tests/queries/0_stateless/01169_old_alter_partition_isolation_stress.sh @@ -19,7 +19,7 @@ function thread_insert() set -e val=1 while true; do - $CLICKHOUSE_CLIENT --multiquery --query " + $CLICKHOUSE_CLIENT --query " BEGIN TRANSACTION; INSERT INTO src VALUES /* ($val, 1) */ ($val, 1); INSERT INTO src VALUES /* ($val, 2) */ ($val, 2); @@ -40,7 +40,7 @@ function thread_partition_src_to_dst() sum=0 for i in {1..20}; do out=$( - $CLICKHOUSE_CLIENT --multiquery --query " + $CLICKHOUSE_CLIENT --query " BEGIN TRANSACTION; INSERT INTO src VALUES /* ($i, 3) */ ($i, 3); INSERT INTO dst SELECT * FROM src; @@ -49,7 +49,7 @@ function thread_partition_src_to_dst() SELECT throwIf((SELECT (count(), sum(n)) FROM merge(currentDatabase(), '') WHERE type=3) != ($count + 1, $sum + $i)) FORMAT Null; COMMIT;" 2>&1) ||: - echo "$out" | grep -Fv "SERIALIZATION_ERROR" | grep -F "Received from " && $CLICKHOUSE_CLIENT --multiquery --query " + echo "$out" | grep -Fv "SERIALIZATION_ERROR" | grep -F "Received from " && $CLICKHOUSE_CLIENT --query " begin transaction; set transaction snapshot 3; select $i, 'src', type, n, _part from src order by type, n; @@ -68,7 +68,7 @@ function thread_partition_dst_to_src() if (( i % 2 )); then action="COMMIT" fi - $CLICKHOUSE_CLIENT --multiquery --query " + $CLICKHOUSE_CLIENT --query " SYSTEM STOP MERGES dst; ALTER TABLE dst DROP PARTITION ID 'nonexistent'; -- STOP MERGES doesn't wait for started merges to finish, so we use this trick SYSTEM SYNC TRANSACTION LOG; @@ -87,7 +87,7 @@ function thread_select() { set -e while true; do - $CLICKHOUSE_CLIENT --multiquery --query " + $CLICKHOUSE_CLIENT --query " BEGIN TRANSACTION; -- no duplicates SELECT type, throwIf(count(n) != countDistinct(n)) FROM src GROUP BY type FORMAT Null; diff --git a/tests/queries/0_stateless/01171_mv_select_insert_isolation_long.sh b/tests/queries/0_stateless/01171_mv_select_insert_isolation_long.sh index 3b9bb50517d..2fb58e4cc57 100755 --- a/tests/queries/0_stateless/01171_mv_select_insert_isolation_long.sh +++ b/tests/queries/0_stateless/01171_mv_select_insert_isolation_long.sh @@ -65,7 +65,7 @@ function insert_commit_action() local tag=$1; shift # some transactions will fail due to constraint - $CLICKHOUSE_CLIENT --multiquery --query " + $CLICKHOUSE_CLIENT --query " BEGIN TRANSACTION; INSERT INTO src VALUES /* ($i, $tag) */ ($i, $tag); SELECT throwIf((SELECT sum(nm) FROM mv) != $(($i * $tag))) /* ($i, $tag) */ FORMAT Null; @@ -83,7 +83,7 @@ function insert_rollback_action() local i=$1; shift local tag=$1; shift - $CLICKHOUSE_CLIENT --multiquery --query " + $CLICKHOUSE_CLIENT --query " BEGIN TRANSACTION; INSERT INTO src VALUES /* (42, $tag) */ (42, $tag); SELECT throwIf((SELECT count() FROM src WHERE n=42 AND m=$tag) != 1) FORMAT Null; @@ -112,7 +112,7 @@ function optimize_action() action="ROLLBACK" fi - $CLICKHOUSE_CLIENT --multiquery --query " + $CLICKHOUSE_CLIENT --query " BEGIN TRANSACTION; $optimize_query; $action; @@ -126,7 +126,7 @@ function select_action() { set -e - $CLICKHOUSE_CLIENT --multiquery --query " + $CLICKHOUSE_CLIENT --query " BEGIN TRANSACTION; SELECT throwIf((SELECT (sum(n), count() % 2) FROM src) != (0, 1)) FORMAT Null; SELECT throwIf((SELECT (sum(nm), count() % 2) FROM mv) != (0, 1)) FORMAT Null; @@ -140,7 +140,7 @@ function select_insert_action() { set -e - $CLICKHOUSE_CLIENT --multiquery --query " + $CLICKHOUSE_CLIENT --query " BEGIN TRANSACTION; SELECT throwIf((SELECT count() FROM tmp) != 0) FORMAT Null; INSERT INTO tmp SELECT 1, n*m FROM src; @@ -199,7 +199,7 @@ wait $PID_8 || echo "second select_insert_action has failed with status $?" 2>&1 wait_for_queries_to_finish $WAIT_FINISH -$CLICKHOUSE_CLIENT --multiquery --query " +$CLICKHOUSE_CLIENT --query " BEGIN TRANSACTION; SELECT throwIf((SELECT (sum(n), count() % 2) FROM src) != (0, 1)) FORMAT Null; SELECT throwIf((SELECT (sum(nm), count() % 2) FROM mv) != (0, 1)) FORMAT Null; @@ -209,7 +209,7 @@ $CLICKHOUSE_CLIENT --multiquery --query " COMMIT; " -$CLICKHOUSE_CLIENT --multiquery --query " +$CLICKHOUSE_CLIENT --query " SELECT throwIf((SELECT (sum(n), count() % 2) FROM src) != (0, 1)) FORMAT Null; SELECT throwIf((SELECT (sum(nm), count() % 2) FROM mv) != (0, 1)) FORMAT Null; SELECT throwIf((SELECT (sum(nm), count() % 2) FROM dst) != (0, 1)) FORMAT Null; diff --git a/tests/queries/0_stateless/01174_select_insert_isolation.sh b/tests/queries/0_stateless/01174_select_insert_isolation.sh index 6321f6ff01b..235d98fb5de 100755 --- a/tests/queries/0_stateless/01174_select_insert_isolation.sh +++ b/tests/queries/0_stateless/01174_select_insert_isolation.sh @@ -16,7 +16,7 @@ $CLICKHOUSE_CLIENT --query "CREATE TABLE mt (n Int8, m Int8) ENGINE=MergeTree OR function thread_insert_commit() { for i in {1..50}; do - $CLICKHOUSE_CLIENT --multiquery --query " + $CLICKHOUSE_CLIENT --query " BEGIN TRANSACTION; INSERT INTO mt VALUES /* ($i, $1) */ ($i, $1); INSERT INTO mt VALUES /* (-$i, $1) */ (-$i, $1); @@ -27,7 +27,7 @@ function thread_insert_commit() function thread_insert_rollback() { for _ in {1..50}; do - $CLICKHOUSE_CLIENT --multiquery --query " + $CLICKHOUSE_CLIENT --query " BEGIN TRANSACTION; INSERT INTO mt VALUES /* (42, $1) */ (42, $1); ROLLBACK;"; @@ -38,7 +38,7 @@ function thread_select() { while true; do # The first and the last queries must get the same result - $CLICKHOUSE_CLIENT --multiquery --query " + $CLICKHOUSE_CLIENT --query " BEGIN TRANSACTION; SET throw_on_unsupported_query_inside_transaction=0; CREATE TEMPORARY TABLE tmp AS SELECT arraySort(groupArray(n)), arraySort(groupArray(m)), arraySort(groupArray(_part)) FROM mt FORMAT Null; @@ -58,7 +58,7 @@ kill -TERM $PID_4 wait wait_for_queries_to_finish 40 -$CLICKHOUSE_CLIENT --multiquery --query " +$CLICKHOUSE_CLIENT --query " BEGIN TRANSACTION; SELECT count(), sum(n), sum(m=1), sum(m=2), sum(m=3) FROM mt;"; diff --git a/tests/queries/0_stateless/01198_client_quota_key.sh b/tests/queries/0_stateless/01198_client_quota_key.sh index 3f5f5df5071..d08aa2e364f 100755 --- a/tests/queries/0_stateless/01198_client_quota_key.sh +++ b/tests/queries/0_stateless/01198_client_quota_key.sh @@ -5,4 +5,4 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -$CLICKHOUSE_CLIENT --quota_key Hello --query_id test_quota_key --log_queries 1 --multiquery --query "SELECT 1; SYSTEM FLUSH LOGS; SELECT DISTINCT quota_key FROM system.query_log WHERE current_database = currentDatabase() AND event_date >= yesterday() AND event_time >= now() - 300 AND query_id = 'test_quota_key'" +$CLICKHOUSE_CLIENT --quota_key Hello --query_id test_quota_key --log_queries 1 --query "SELECT 1; SYSTEM FLUSH LOGS; SELECT DISTINCT quota_key FROM system.query_log WHERE current_database = currentDatabase() AND event_date >= yesterday() AND event_time >= now() - 300 AND query_id = 'test_quota_key'" diff --git a/tests/queries/0_stateless/01285_engine_join_donmikel.sh b/tests/queries/0_stateless/01285_engine_join_donmikel.sh index 7522ed9924b..ce273ab8e0c 100755 --- a/tests/queries/0_stateless/01285_engine_join_donmikel.sh +++ b/tests/queries/0_stateless/01285_engine_join_donmikel.sh @@ -4,7 +4,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -$CLICKHOUSE_CLIENT --multiquery --query " +$CLICKHOUSE_CLIENT --query " DROP TABLE IF EXISTS NmSubj; DROP TABLE IF EXISTS events; @@ -60,7 +60,7 @@ FROM events as e INNER JOIN NmSubj as ns ON ns.NmId = toUInt32(e.Param1) WHERE e.EventDate = today() - 7 AND e.EventId = 'GCO' AND ns.SubjectId = 2073" -$CLICKHOUSE_CLIENT --multiquery --query " +$CLICKHOUSE_CLIENT --query " DROP TABLE NmSubj; DROP TABLE events; " diff --git a/tests/queries/0_stateless/01293_optimize_final_force.sh b/tests/queries/0_stateless/01293_optimize_final_force.sh index e838af8af9b..9c135d272e4 100755 --- a/tests/queries/0_stateless/01293_optimize_final_force.sh +++ b/tests/queries/0_stateless/01293_optimize_final_force.sh @@ -11,7 +11,7 @@ TIMELIMIT=31 while [ $SECONDS -lt "$TIMELIMIT" ] && [ $it -lt 100 ]; do it=$((it+1)) - $CLICKHOUSE_CLIENT --multiquery --query " + $CLICKHOUSE_CLIENT --query " DROP TABLE IF EXISTS mt; CREATE TABLE mt (x UInt8, k UInt8 DEFAULT 0) ENGINE = SummingMergeTree ORDER BY k; diff --git a/tests/queries/0_stateless/01304_direct_io_long.sh b/tests/queries/0_stateless/01304_direct_io_long.sh index 2e27c2f7728..1241f299d94 100755 --- a/tests/queries/0_stateless/01304_direct_io_long.sh +++ b/tests/queries/0_stateless/01304_direct_io_long.sh @@ -5,7 +5,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -$CLICKHOUSE_CLIENT --multiquery --query " +$CLICKHOUSE_CLIENT --query " DROP TABLE IF EXISTS bug; CREATE TABLE bug (UserID UInt64, Date Date) ENGINE = MergeTree ORDER BY Date SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi', merge_max_block_size = 8192; @@ -18,5 +18,5 @@ cat "$LOG" | grep Loaded rm "$LOG" -$CLICKHOUSE_CLIENT --multiquery --query " +$CLICKHOUSE_CLIENT --query " DROP TABLE bug;" diff --git a/tests/queries/0_stateless/01375_storage_file_tsv_csv_with_names_write_prefix.sh b/tests/queries/0_stateless/01375_storage_file_tsv_csv_with_names_write_prefix.sh index af8d3f4e69b..a634f689dca 100755 --- a/tests/queries/0_stateless/01375_storage_file_tsv_csv_with_names_write_prefix.sh +++ b/tests/queries/0_stateless/01375_storage_file_tsv_csv_with_names_write_prefix.sh @@ -9,7 +9,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) echo 'zero rows' for format in TSVWithNames TSVWithNamesAndTypes CSVWithNames CSVWithNamesAndTypes JSONCompactEachRowWithNames JSONCompactEachRowWithNamesAndTypes JSONCompactStringsEachRow JSONCompactStringsEachRowWithNamesAndTypes; do echo $format - ${CLICKHOUSE_LOCAL} --multiquery --query=" + ${CLICKHOUSE_LOCAL} --query=" CREATE TABLE ${format}_01375 ENGINE File($format, '01375_$format') AS SELECT * FROM numbers(1) WHERE number < 0; SELECT * FROM ${format}_01375; DROP TABLE ${format}_01375; @@ -22,7 +22,7 @@ echo 'multi clickhouse-local one file' for format in TSVWithNames TSVWithNamesAndTypes CSVWithNames CSVWithNamesAndTypes JSONCompactEachRowWithNames JSONCompactEachRowWithNamesAndTypes JSONCompactStringsEachRow JSONCompactStringsEachRowWithNamesAndTypes; do echo $format for _ in {1..2}; do - ${CLICKHOUSE_LOCAL} --multiquery --query=" + ${CLICKHOUSE_LOCAL} --query=" CREATE TABLE ${format}_01375 ENGINE File($format, '01375_$format') AS SELECT * FROM numbers(1); SELECT * FROM ${format}_01375; DROP TABLE ${format}_01375; diff --git a/tests/queries/0_stateless/01443_merge_truncate_long.sh b/tests/queries/0_stateless/01443_merge_truncate_long.sh index 65b9bcd366e..51654b2e4e1 100755 --- a/tests/queries/0_stateless/01443_merge_truncate_long.sh +++ b/tests/queries/0_stateless/01443_merge_truncate_long.sh @@ -34,7 +34,7 @@ do SELECT count() FROM t HAVING count() > 0; SELECT ${i}; " -done | ${CLICKHOUSE_CLIENT} --multiquery +done | ${CLICKHOUSE_CLIENT} wait diff --git a/tests/queries/0_stateless/01527_clickhouse_local_optimize.sh b/tests/queries/0_stateless/01527_clickhouse_local_optimize.sh index d61d34244d9..c1d5c357308 100755 --- a/tests/queries/0_stateless/01527_clickhouse_local_optimize.sh +++ b/tests/queries/0_stateless/01527_clickhouse_local_optimize.sh @@ -10,6 +10,6 @@ rm -rf "${WORKING_FOLDER_01527}" mkdir -p "${WORKING_FOLDER_01527}" # OPTIMIZE was crashing due to lack of temporary volume in local -${CLICKHOUSE_LOCAL} --multiquery --query "drop database if exists d; create database d; create table d.t engine MergeTree order by a as select 1 a; optimize table d.t final" --path="${WORKING_FOLDER_01527}" +${CLICKHOUSE_LOCAL} --query "drop database if exists d; create database d; create table d.t engine MergeTree order by a as select 1 a; optimize table d.t final" --path="${WORKING_FOLDER_01527}" rm -rf "${WORKING_FOLDER_01527}" diff --git a/tests/queries/0_stateless/01543_avro_deserialization_with_lc.sh b/tests/queries/0_stateless/01543_avro_deserialization_with_lc.sh index a5697a62dc2..bc9efaedd5d 100755 --- a/tests/queries/0_stateless/01543_avro_deserialization_with_lc.sh +++ b/tests/queries/0_stateless/01543_avro_deserialization_with_lc.sh @@ -5,7 +5,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -$CLICKHOUSE_CLIENT --multiquery --query " +$CLICKHOUSE_CLIENT --query " SET allow_suspicious_low_cardinality_types=1; CREATE TABLE IF NOT EXISTS test_01543 (value LowCardinality(String), value2 LowCardinality(UInt64)) ENGINE=Memory(); " diff --git a/tests/queries/0_stateless/01544_file_engine_settings.sh b/tests/queries/0_stateless/01544_file_engine_settings.sh index b31754f9531..eb0a8a964d0 100755 --- a/tests/queries/0_stateless/01544_file_engine_settings.sh +++ b/tests/queries/0_stateless/01544_file_engine_settings.sh @@ -10,7 +10,7 @@ rm -f -- "$the_file" # We are going to check that format settings work for File engine, # by creating a table with a non-default delimiter, and reading from it. -${CLICKHOUSE_LOCAL} --multiquery --query " +${CLICKHOUSE_LOCAL} --query " create table t(a int, b int) engine File(CSV, '$the_file') settings format_csv_delimiter = '|'; insert into t select 1 a, 1 b; " @@ -18,7 +18,7 @@ ${CLICKHOUSE_LOCAL} --multiquery --query " # See what's in the file cat "$the_file" -${CLICKHOUSE_LOCAL} --multiquery --query " +${CLICKHOUSE_LOCAL} --query " create table t(a int, b int) engine File(CSV, '$the_file') settings format_csv_delimiter = '|'; select * from t; " diff --git a/tests/queries/0_stateless/01600_detach_permanently.sh b/tests/queries/0_stateless/01600_detach_permanently.sh index 6721dbf3015..679e9a749ee 100755 --- a/tests/queries/0_stateless/01600_detach_permanently.sh +++ b/tests/queries/0_stateless/01600_detach_permanently.sh @@ -18,7 +18,7 @@ mkdir -p "${WORKING_FOLDER_01600}" clickhouse_local() { local query="$1" shift - ${CLICKHOUSE_LOCAL} --allow_deprecated_database_ordinary=1 --multiquery --query "$query" "$@" --path="${WORKING_FOLDER_01600}" + ${CLICKHOUSE_LOCAL} --allow_deprecated_database_ordinary=1 --query "$query" "$@" --path="${WORKING_FOLDER_01600}" } test_detach_attach_sequence() { diff --git a/tests/queries/0_stateless/01600_parts_states_metrics_long.sh b/tests/queries/0_stateless/01600_parts_states_metrics_long.sh index 7215f270a4c..47b5a4dea13 100755 --- a/tests/queries/0_stateless/01600_parts_states_metrics_long.sh +++ b/tests/queries/0_stateless/01600_parts_states_metrics_long.sh @@ -24,7 +24,7 @@ verify() if [[ $i -eq 5000 ]] then - $CLICKHOUSE_CLIENT --multiquery " + $CLICKHOUSE_CLIENT " SELECT sumIf(value, metric = 'PartsActive'), sumIf(value, metric = 'PartsOutdated') FROM system.metrics; SELECT sum(active), sum(NOT active) FROM system.parts; SELECT sum(active), sum(NOT active) FROM system.projection_parts; diff --git a/tests/queries/0_stateless/01606_git_import.sh b/tests/queries/0_stateless/01606_git_import.sh index 48558d79f93..6986d6b14cf 100755 --- a/tests/queries/0_stateless/01606_git_import.sh +++ b/tests/queries/0_stateless/01606_git_import.sh @@ -19,7 +19,7 @@ done ${CLICKHOUSE_GIT_IMPORT} 2>&1 | wc -l -${CLICKHOUSE_CLIENT} --multiline --multiquery --query " +${CLICKHOUSE_CLIENT} --multiline --query " DROP TABLE IF EXISTS commits; DROP TABLE IF EXISTS file_changes; @@ -122,7 +122,7 @@ ${CLICKHOUSE_CLIENT} --query "SELECT count() FROM commits" ${CLICKHOUSE_CLIENT} --query "SELECT count() FROM file_changes" ${CLICKHOUSE_CLIENT} --query "SELECT count(), round(avg(indent), 1) FROM line_changes" -${CLICKHOUSE_CLIENT} --multiline --multiquery --query " +${CLICKHOUSE_CLIENT} --multiline --query " DROP TABLE commits; DROP TABLE file_changes; DROP TABLE line_changes; diff --git a/tests/queries/0_stateless/01607_arrays_as_nested_csv.sh b/tests/queries/0_stateless/01607_arrays_as_nested_csv.sh index 946be7fb4af..2a1182c14c1 100755 --- a/tests/queries/0_stateless/01607_arrays_as_nested_csv.sh +++ b/tests/queries/0_stateless/01607_arrays_as_nested_csv.sh @@ -4,7 +4,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -${CLICKHOUSE_CLIENT} --multiquery --query " +${CLICKHOUSE_CLIENT} --query " DROP TABLE IF EXISTS test; CREATE TABLE test (a Array(String)) ENGINE = Memory; " @@ -22,7 +22,7 @@ ${CLICKHOUSE_CLIENT} --input_format_csv_arrays_as_nested_csv 1 --query "INSERT I """Hello"", ""world"", ""42"""" TV""" END -${CLICKHOUSE_CLIENT} --multiquery --query " +${CLICKHOUSE_CLIENT} --query " SELECT * FROM test; DROP TABLE IF EXISTS test; " diff --git a/tests/queries/0_stateless/01632_tinylog_read_write.sh b/tests/queries/0_stateless/01632_tinylog_read_write.sh index 10625ec5d27..68d28b080e9 100755 --- a/tests/queries/0_stateless/01632_tinylog_read_write.sh +++ b/tests/queries/0_stateless/01632_tinylog_read_write.sh @@ -8,7 +8,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CURDIR"/../shell_config.sh -$CLICKHOUSE_CLIENT --multiquery --query "DROP TABLE IF EXISTS test; CREATE TABLE IF NOT EXISTS test (x UInt64, s Array(Nullable(String))) ENGINE = TinyLog;" +$CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS test; CREATE TABLE IF NOT EXISTS test (x UInt64, s Array(Nullable(String))) ENGINE = TinyLog;" function thread_select { local TIMELIMIT=$((SECONDS+$1)) @@ -47,4 +47,4 @@ thread_insert $TIMEOUT & wait echo "Done" -$CLICKHOUSE_CLIENT --multiquery --query "DROP TABLE IF EXISTS test;" +$CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS test;" diff --git a/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh b/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh index 685fe69642a..ceb6aa060ea 100755 --- a/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh +++ b/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh @@ -24,7 +24,7 @@ ${CLICKHOUSE_CLIENT} --query "select file('a.txt'), file('b.txt');";echo ":"$? ${CLICKHOUSE_CLIENT} --query "insert into data select file('a.txt'), file('b.txt');";echo ":"$? ${CLICKHOUSE_CLIENT} --query "insert into data select file('a.txt'), file('b.txt');";echo ":"$? ${CLICKHOUSE_CLIENT} --query "select file('c.txt'), * from data";echo ":"$? -${CLICKHOUSE_CLIENT} --multiquery --query " +${CLICKHOUSE_CLIENT} --query " create table filenames(name String) engine=MergeTree() order by tuple(); insert into filenames values ('a.txt'), ('b.txt'), ('c.txt'); select file(name) from filenames format TSV; @@ -56,7 +56,7 @@ echo $c_count # Valid cases: # The default dir is the CWD path in LOCAL mode -${CLICKHOUSE_LOCAL} --multiquery --query " +${CLICKHOUSE_LOCAL} --query " drop table if exists data; create table data (A String, B String) engine=MergeTree() order by A; select file('a.txt'), file('b.txt'); diff --git a/tests/queries/0_stateless/01666_merge_tree_max_query_limit.sh b/tests/queries/0_stateless/01666_merge_tree_max_query_limit.sh index e04c9515009..ec318db98bf 100755 --- a/tests/queries/0_stateless/01666_merge_tree_max_query_limit.sh +++ b/tests/queries/0_stateless/01666_merge_tree_max_query_limit.sh @@ -8,7 +8,7 @@ function wait_for_query_to_start() { while [[ $($CLICKHOUSE_CURL -sS "$CLICKHOUSE_URL" -d "SELECT sum(read_rows) FROM system.processes WHERE query_id = '$1'") == 0 ]]; do sleep 0.1; done } -${CLICKHOUSE_CLIENT} --multiline --multiquery --query " +${CLICKHOUSE_CLIENT} --multiline --query " drop table if exists simple; create table simple (i int, j int) engine = MergeTree order by i diff --git a/tests/queries/0_stateless/01747_system_session_log_long.sh b/tests/queries/0_stateless/01747_system_session_log_long.sh index 022bf488886..07055f96782 100755 --- a/tests/queries/0_stateless/01747_system_session_log_long.sh +++ b/tests/queries/0_stateless/01747_system_session_log_long.sh @@ -82,7 +82,7 @@ trap "cleanup" EXIT function executeQueryExpectError() { cat - > "${TMP_QUERY_FILE}" - ! ${CLICKHOUSE_CLIENT} --multiquery --queries-file "${TMP_QUERY_FILE}" "${@}" 2>&1 | tee -a "${TMP_QUERY_FILE}" + ! ${CLICKHOUSE_CLIENT} --queries-file "${TMP_QUERY_FILE}" "${@}" 2>&1 | tee -a "${TMP_QUERY_FILE}" } function createUser() @@ -303,7 +303,7 @@ function runEndpointTests() if [[ -n "${setup_queries}" ]] then # echo "Executing setup queries: ${setup_queries}" - echo "${setup_queries}" | executeQuery --multiquery + echo "${setup_queries}" | executeQuery fi testTCP "${auth_type}" "${username}" "${password}" @@ -357,7 +357,7 @@ testAsUserIdentifiedBy "plaintext_password" testAsUserIdentifiedBy "sha256_password" testAsUserIdentifiedBy "double_sha1_password" -executeQuery --multiquery <= 1000000 ? 1 : time FROM system.query_log WHERE current_database = currentDatabase() AND query_kind = 'Insert' AND event_date >= yesterday() AND type = 2 ORDER BY event_time DESC LIMIT 1;" diff --git a/tests/queries/0_stateless/01939_network_receive_bytes_metrics.sh b/tests/queries/0_stateless/01939_network_receive_bytes_metrics.sh index 03babad40f3..b2335a0365b 100755 --- a/tests/queries/0_stateless/01939_network_receive_bytes_metrics.sh +++ b/tests/queries/0_stateless/01939_network_receive_bytes_metrics.sh @@ -4,11 +4,11 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -${CLICKHOUSE_CLIENT} --multiquery --query "DROP TABLE IF EXISTS t; CREATE TABLE t (x UInt64) ENGINE = Memory;" +${CLICKHOUSE_CLIENT} --query "DROP TABLE IF EXISTS t; CREATE TABLE t (x UInt64) ENGINE = Memory;" seq 1 1000 | ${CLICKHOUSE_CLIENT} --query "INSERT INTO t FORMAT TSV" -${CLICKHOUSE_CLIENT} --multiquery --query "SYSTEM FLUSH LOGS; +${CLICKHOUSE_CLIENT} --query "SYSTEM FLUSH LOGS; WITH ProfileEvents['NetworkReceiveBytes'] AS bytes SELECT bytes >= 8000 AND bytes < 9000 ? 1 : bytes FROM system.query_log WHERE current_database = currentDatabase() AND query_kind = 'Insert' AND event_date >= yesterday() AND type = 2 ORDER BY event_time DESC LIMIT 1;" diff --git a/tests/queries/0_stateless/01946_test_wrong_host_name_access.sh b/tests/queries/0_stateless/01946_test_wrong_host_name_access.sh index a00f307673e..ed2828c3f54 100755 --- a/tests/queries/0_stateless/01946_test_wrong_host_name_access.sh +++ b/tests/queries/0_stateless/01946_test_wrong_host_name_access.sh @@ -7,7 +7,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -${CLICKHOUSE_CLIENT} --multiquery --query " +${CLICKHOUSE_CLIENT} --query " DROP USER IF EXISTS dns_fail_1, dns_fail_2; CREATE USER dns_fail_1 HOST NAME 'non.existing.host.name', '${MYHOSTNAME}'; CREATE USER dns_fail_2 HOST NAME '${MYHOSTNAME}', 'non.existing.host.name';" diff --git a/tests/queries/0_stateless/01946_test_zstd_decompression_with_escape_sequence_at_the_end_of_buffer.sh b/tests/queries/0_stateless/01946_test_zstd_decompression_with_escape_sequence_at_the_end_of_buffer.sh index 0aedef028a2..b3748581f4f 100755 --- a/tests/queries/0_stateless/01946_test_zstd_decompression_with_escape_sequence_at_the_end_of_buffer.sh +++ b/tests/queries/0_stateless/01946_test_zstd_decompression_with_escape_sequence_at_the_end_of_buffer.sh @@ -9,7 +9,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) mkdir -p ${USER_FILES_PATH}/ cp $CUR_DIR/data_zstd/test_01946.zstd ${USER_FILES_PATH}/ -${CLICKHOUSE_CLIENT} --multiline --multiquery --query " +${CLICKHOUSE_CLIENT} --multiline --query " set min_chunk_bytes_for_parallel_parsing=10485760; set max_read_buffer_size = 65536; set input_format_parallel_parsing = 0; diff --git a/tests/queries/0_stateless/02009_from_infile.sh b/tests/queries/0_stateless/02009_from_infile.sh index 6a31aa4ac55..578ac14f558 100755 --- a/tests/queries/0_stateless/02009_from_infile.sh +++ b/tests/queries/0_stateless/02009_from_infile.sh @@ -19,7 +19,7 @@ ${CLICKHOUSE_CLIENT} --query "INSERT INTO test_infile FROM INFILE '${CLICKHOUSE_ ${CLICKHOUSE_CLIENT} --query "SELECT * FROM test_infile;" # if it not fails, select will print information -${CLICKHOUSE_LOCAL} --multiquery --query "CREATE TABLE test_infile (word String) ENGINE=Memory(); INSERT INTO test_infile FROM INFILE '${CLICKHOUSE_TMP}/test_infile.gz' FORMAT CSV; SELECT * from test_infile;" +${CLICKHOUSE_LOCAL} --query "CREATE TABLE test_infile (word String) ENGINE=Memory(); INSERT INTO test_infile FROM INFILE '${CLICKHOUSE_TMP}/test_infile.gz' FORMAT CSV; SELECT * from test_infile;" ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&query=DROP+TABLE" -d 'IF EXISTS test_infile_url' ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&query=CREATE" -d 'TABLE test_infile_url (x String) ENGINE = Memory' diff --git a/tests/queries/0_stateless/02024_compression_in_query.sh b/tests/queries/0_stateless/02024_compression_in_query.sh index caa74523bd7..2936568c991 100755 --- a/tests/queries/0_stateless/02024_compression_in_query.sh +++ b/tests/queries/0_stateless/02024_compression_in_query.sh @@ -55,8 +55,8 @@ ${CLICKHOUSE_CLIENT} --query "DROP TABLE IF EXISTS test_compression_keyword;" [ -e "${CLICKHOUSE_TMP}"/test_comp_for_input_and_output_without_gz.gz ] && rm "${CLICKHOUSE_TMP}"/test_comp_for_input_and_output_without_gz.gz # create files using compression method and without it to check that both queries work correct -${CLICKHOUSE_LOCAL} --multiquery --query "SELECT * FROM (SELECT 'Hello, World! From local.') INTO OUTFILE '${CLICKHOUSE_TMP}/test_comp_for_input_and_output.gz' FORMAT TabSeparated;" -${CLICKHOUSE_LOCAL} --multiquery --query "SELECT * FROM (SELECT 'Hello, World! From local.') INTO OUTFILE '${CLICKHOUSE_TMP}/test_comp_for_input_and_output_without_gz' COMPRESSION 'GZ' FORMAT TabSeparated;" +${CLICKHOUSE_LOCAL} --query "SELECT * FROM (SELECT 'Hello, World! From local.') INTO OUTFILE '${CLICKHOUSE_TMP}/test_comp_for_input_and_output.gz' FORMAT TabSeparated;" +${CLICKHOUSE_LOCAL} --query "SELECT * FROM (SELECT 'Hello, World! From local.') INTO OUTFILE '${CLICKHOUSE_TMP}/test_comp_for_input_and_output_without_gz' COMPRESSION 'GZ' FORMAT TabSeparated;" # check content of files cp ${CLICKHOUSE_TMP}/test_comp_for_input_and_output.gz ${CLICKHOUSE_TMP}/test_comp_for_input_and_output_to_decomp.gz @@ -68,7 +68,7 @@ gunzip ${CLICKHOUSE_TMP}/test_comp_for_input_and_output_without_gz_to_decomp.gz cat ${CLICKHOUSE_TMP}/test_comp_for_input_and_output_without_gz_to_decomp # create table to check inserts -${CLICKHOUSE_LOCAL} --multiquery --query " +${CLICKHOUSE_LOCAL} --query " DROP TABLE IF EXISTS test_compression_keyword; CREATE TABLE test_compression_keyword (text String) Engine=Memory; INSERT INTO TABLE test_compression_keyword FROM INFILE '${CLICKHOUSE_TMP}/test_comp_for_input_and_output.gz' FORMAT TabSeparated; diff --git a/tests/queries/0_stateless/02048_parallel_reading_from_infile.sh b/tests/queries/0_stateless/02048_parallel_reading_from_infile.sh index f055ea304b2..efc19cad054 100755 --- a/tests/queries/0_stateless/02048_parallel_reading_from_infile.sh +++ b/tests/queries/0_stateless/02048_parallel_reading_from_infile.sh @@ -17,7 +17,7 @@ echo -e "103" > "${CLICKHOUSE_TMP}"/test_infile_parallel_3 gzip "${CLICKHOUSE_TMP}"/test_infile_parallel -${CLICKHOUSE_CLIENT} --multiquery <&1 | grep -q "27" && echo "Correct" || echo 'Fail' -${CLICKHOUSE_LOCAL} --multiquery <&1 | grep 'AlterCommand' + $CLICKHOUSE_CLIENT --readonly 1 2>&1 | grep 'AlterCommand' diff --git a/tests/queries/0_stateless/02206_clickhouse_local_use_database.sh b/tests/queries/0_stateless/02206_clickhouse_local_use_database.sh index 59ede739e4a..3b71c8754c9 100755 --- a/tests/queries/0_stateless/02206_clickhouse_local_use_database.sh +++ b/tests/queries/0_stateless/02206_clickhouse_local_use_database.sh @@ -5,7 +5,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CURDIR"/../shell_config.sh -$CLICKHOUSE_LOCAL --echo --multiline --multiquery -q """ +$CLICKHOUSE_LOCAL --echo --multiline -q """ SHOW TABLES; CREATE DATABASE test1; CREATE TABLE test1.table1 (a Int32) ENGINE=Memory; diff --git a/tests/queries/0_stateless/02226_filesystem_cache_profile_events.sh b/tests/queries/0_stateless/02226_filesystem_cache_profile_events.sh index d0e61541b15..18ae2d7b4b3 100755 --- a/tests/queries/0_stateless/02226_filesystem_cache_profile_events.sh +++ b/tests/queries/0_stateless/02226_filesystem_cache_profile_events.sh @@ -10,7 +10,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) for STORAGE_POLICY in 's3_cache' 'local_cache' 'azure_cache'; do echo "Using storage policy: $STORAGE_POLICY" - $CLICKHOUSE_CLIENT --multiquery --multiline --query """ + $CLICKHOUSE_CLIENT --multiline --query """ SET max_memory_usage='20G'; SET enable_filesystem_cache_on_write_operations = 0; @@ -25,7 +25,7 @@ for STORAGE_POLICY in 's3_cache' 'local_cache' 'azure_cache'; do query_id=$($CLICKHOUSE_CLIENT --query "select queryID() from ($query) limit 1" 2>&1) - $CLICKHOUSE_CLIENT --multiquery --multiline --query """ + $CLICKHOUSE_CLIENT --multiline --query """ SYSTEM FLUSH LOGS; SELECT ProfileEvents['CachedReadBufferReadFromCacheHits'] > 0 as remote_fs_cache_hit, ProfileEvents['CachedReadBufferReadFromCacheMisses'] > 0 as remote_fs_cache_miss, @@ -40,14 +40,14 @@ for STORAGE_POLICY in 's3_cache' 'local_cache' 'azure_cache'; do LIMIT 1; """ - $CLICKHOUSE_CLIENT --multiquery --multiline --query """ + $CLICKHOUSE_CLIENT --multiline --query """ set remote_filesystem_read_method = 'read'; set local_filesystem_read_method = 'pread'; """ query_id=$($CLICKHOUSE_CLIENT --query "select queryID() from ($query) limit 1" 2>&1) - $CLICKHOUSE_CLIENT --multiquery --multiline --query """ + $CLICKHOUSE_CLIENT --multiline --query """ SYSTEM FLUSH LOGS; SELECT ProfileEvents['CachedReadBufferReadFromCacheHits'] > 0 as remote_fs_cache_hit, ProfileEvents['CachedReadBufferReadFromCacheMisses'] > 0 as remote_fs_cache_miss, @@ -63,13 +63,13 @@ for STORAGE_POLICY in 's3_cache' 'local_cache' 'azure_cache'; do """ - $CLICKHOUSE_CLIENT --multiquery --multiline --query """ + $CLICKHOUSE_CLIENT --multiline --query """ set remote_filesystem_read_method='threadpool'; """ query_id=$($CLICKHOUSE_CLIENT --query "select queryID() from ($query) limit 1") - $CLICKHOUSE_CLIENT --multiquery --multiline --query """ + $CLICKHOUSE_CLIENT --multiline --query """ SYSTEM FLUSH LOGS; SELECT ProfileEvents['CachedReadBufferReadFromCacheHits'] > 0 as remote_fs_cache_hit, ProfileEvents['CachedReadBufferReadFromCacheMisses'] > 0 as remote_fs_cache_miss, @@ -84,7 +84,7 @@ for STORAGE_POLICY in 's3_cache' 'local_cache' 'azure_cache'; do LIMIT 1; """ - $CLICKHOUSE_CLIENT --multiquery --multiline --query """ + $CLICKHOUSE_CLIENT --multiline --query """ SELECT * FROM test_02226 WHERE value LIKE '%abc%' ORDER BY value LIMIT 10 FORMAT Null; SET enable_filesystem_cache_on_write_operations = 1; diff --git a/tests/queries/0_stateless/02227_test_create_empty_sqlite_db.sh b/tests/queries/0_stateless/02227_test_create_empty_sqlite_db.sh index 344452767cc..a3fe5f19de0 100755 --- a/tests/queries/0_stateless/02227_test_create_empty_sqlite_db.sh +++ b/tests/queries/0_stateless/02227_test_create_empty_sqlite_db.sh @@ -17,7 +17,7 @@ export CURR_DATABASE="test_01889_sqllite_${CLICKHOUSE_DATABASE}" DB_PATH=${USER_FILES_PATH}/${CURR_DATABASE}_db1 -${CLICKHOUSE_CLIENT} --multiquery --multiline --query=""" +${CLICKHOUSE_CLIENT} --multiline --query=""" DROP DATABASE IF EXISTS ${CURR_DATABASE}; CREATE DATABASE ${CURR_DATABASE} ENGINE = SQLite('${DB_PATH}'); SHOW TABLES FROM ${CURR_DATABASE}; @@ -25,6 +25,6 @@ SHOW TABLES FROM ${CURR_DATABASE}; sqlite3 "${DB_PATH}" 'CREATE TABLE table1 (col1 text, col2 smallint);' -${CLICKHOUSE_CLIENT} --multiquery --multiline --query=""" +${CLICKHOUSE_CLIENT} --multiline --query=""" SHOW TABLES FROM ${CURR_DATABASE}; """ diff --git a/tests/queries/0_stateless/02235_remote_fs_cache_stress.sh b/tests/queries/0_stateless/02235_remote_fs_cache_stress.sh index ffc38c0c1bd..aa5db33417c 100755 --- a/tests/queries/0_stateless/02235_remote_fs_cache_stress.sh +++ b/tests/queries/0_stateless/02235_remote_fs_cache_stress.sh @@ -6,7 +6,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CUR_DIR"/../shell_config.sh -${CLICKHOUSE_CLIENT} --allow_suspicious_low_cardinality_types=1 --multiquery --multiline --query=""" +${CLICKHOUSE_CLIENT} --allow_suspicious_low_cardinality_types=1 --multiline --query=""" DROP TABLE IF EXISTS t_01411; DROP TABLE IF EXISTS t_01411_num; @@ -35,7 +35,7 @@ insert into lc_dict_reading select number, if(number < 8192 * 4, number % 100, n function go() { -${CLICKHOUSE_CLIENT} --multiquery --multiline --query=""" +${CLICKHOUSE_CLIENT} --multiline --query=""" select sum(toUInt64(str)), sum(toUInt64(pat)) from lc_dict_reading where val < 8129 or val > 8192 * 4; @@ -67,7 +67,7 @@ for _ in `seq 1 32`; do go | grep -q "Exception" && echo 'FAIL' || echo 'OK' ||: wait -${CLICKHOUSE_CLIENT} --multiquery --multiline --query=""" +${CLICKHOUSE_CLIENT} --multiline --query=""" DROP TABLE IF EXISTS t_01411; DROP TABLE IF EXISTS t_01411_num; """ diff --git a/tests/queries/0_stateless/02240_protobuflist_format_persons.sh b/tests/queries/0_stateless/02240_protobuflist_format_persons.sh index 637e01b9e63..e5e717d00a8 100755 --- a/tests/queries/0_stateless/02240_protobuflist_format_persons.sh +++ b/tests/queries/0_stateless/02240_protobuflist_format_persons.sh @@ -15,7 +15,7 @@ SCHEMADIR=$CURDIR/format_schemas set -eo pipefail # Run the client. -$CLICKHOUSE_CLIENT --multiquery <&1 | rg -Fc "'w' character" +$CLICKHOUSE_LOCAL <&1 | rg -Fc "'w' character" SELECT * FROM format(JSONEachRow, 'x Bool', '{"x": wtf}'); END -$CLICKHOUSE_LOCAL --multiquery <&1 | rg -Fc "expected 'false'" +$CLICKHOUSE_LOCAL <&1 | rg -Fc "expected 'false'" SELECT * FROM format(JSONEachRow, 'x Bool', '{"x": ftw}'); END -$CLICKHOUSE_LOCAL --multiquery <&1 | rg -Fc "'{' character" +$CLICKHOUSE_LOCAL <&1 | rg -Fc "'{' character" SELECT * FROM format(JSONEachRow, 'x Bool', '{"x": {}}'); END diff --git a/tests/queries/0_stateless/02722_database_filesystem.sh b/tests/queries/0_stateless/02722_database_filesystem.sh index 2d0ff256c95..fa23d847d90 100755 --- a/tests/queries/0_stateless/02722_database_filesystem.sh +++ b/tests/queries/0_stateless/02722_database_filesystem.sh @@ -30,7 +30,7 @@ cp ${user_files_tmp_dir}/tmp_numbers_1.csv ${user_files_tmp_dir}/tmp/tmp_numbers ################# echo "Test 1: create filesystem database and check implicit calls" -${CLICKHOUSE_CLIENT} --multiline --multiquery -q """ +${CLICKHOUSE_CLIENT} --multiline -q """ DROP DATABASE IF EXISTS test1; CREATE DATABASE test1 ENGINE = Filesystem; """ @@ -57,20 +57,20 @@ ${CLICKHOUSE_CLIENT} --query "SELECT COUNT(*) FROM test1.\`/tmp/tmp.csv\`;" 2>&1 ${CLICKHOUSE_CLIENT} --query "SELECT COUNT(*) FROM test1.\`../*/tmp_numbers_*.csv\`;" 2>&1 | tr '\n' ' ' | grep -oF "PATH_ACCESS_DENIED" > /dev/null && echo "OK" || echo 'FAIL' ||: ${CLICKHOUSE_CLIENT} --query "SELECT COUNT(*) FROM test1.\`../tmp_numbers_*.csv\`;" 2>&1 | tr '\n' ' ' | grep -oF "PATH_ACCESS_DENIED" > /dev/null && echo "OK" || echo 'FAIL' ||: ${CLICKHOUSE_CLIENT} --query "SELECT COUNT(*) FROM test1.\`../*.csv\`;" 2>&1 | tr '\n' ' ' | grep -oF "PATH_ACCESS_DENIED" > /dev/null && echo "OK" || echo 'FAIL' ||: -${CLICKHOUSE_CLIENT} --multiline --multiquery --query """ +${CLICKHOUSE_CLIENT} --multiline --query """ USE test1; SELECT COUNT(*) FROM \"../${tmp_dir}/tmp.csv\"; """ 2>&1 | tr '\n' ' ' | grep -oF "PATH_ACCESS_DENIED" > /dev/null && echo "OK" || echo 'FAIL' ||: ${CLICKHOUSE_CLIENT} --query "SELECT COUNT(*) FROM test1.\`../../../../../../tmp.csv\`;" 2>&1 | tr '\n' ' ' | grep -oF "PATH_ACCESS_DENIED" > /dev/null && echo "OK" || echo 'FAIL' ||: # BAD_ARGUMENTS: path should be inside user_files -${CLICKHOUSE_CLIENT} --multiline --multiquery -q """ +${CLICKHOUSE_CLIENT} --multiline -q """ DROP DATABASE IF EXISTS test2; CREATE DATABASE test2 ENGINE = Filesystem('/tmp'); """ 2>&1 | tr '\n' ' ' | grep -oF -e "UNKNOWN_TABLE" -e "BAD_ARGUMENTS" > /dev/null && echo "OK" || echo 'FAIL' ||: # BAD_ARGUMENTS: .../user_files/relative_unknown_dir does not exist -${CLICKHOUSE_CLIENT} --multiline --multiquery -q """ +${CLICKHOUSE_CLIENT} --multiline -q """ DROP DATABASE IF EXISTS test2; CREATE DATABASE test2 ENGINE = Filesystem('relative_unknown_dir'); """ 2>&1 | tr '\n' ' ' | grep -oF -e "UNKNOWN_TABLE" -e "BAD_ARGUMENTS" > /dev/null && echo "OK" || echo 'FAIL' ||: diff --git a/tests/queries/0_stateless/02724_database_s3.sh b/tests/queries/0_stateless/02724_database_s3.sh index 80b47282146..cc7f012c8cf 100755 --- a/tests/queries/0_stateless/02724_database_s3.sh +++ b/tests/queries/0_stateless/02724_database_s3.sh @@ -8,7 +8,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) ################# echo "Test 1: select from s3" -${CLICKHOUSE_CLIENT} --multiline --multiquery -q """ +${CLICKHOUSE_CLIENT} --multiline -q """ DROP DATABASE IF EXISTS test1; CREATE DATABASE test1 ENGINE = S3; USE test1; @@ -17,7 +17,7 @@ SELECT * FROM \"http://localhost:11111/test/a.tsv\" ${CLICKHOUSE_CLIENT} -q "SHOW DATABASES;" | grep test1 # check credentials with absolute path -${CLICKHOUSE_CLIENT} --multiline --multiquery -q """ +${CLICKHOUSE_CLIENT} --multiline -q """ DROP DATABASE IF EXISTS test2; CREATE DATABASE test2 ENGINE = S3('', 'test', 'testtest'); USE test2; @@ -25,7 +25,7 @@ SELECT * FROM \"http://localhost:11111/test/b.tsv\" """ # check credentials with relative path -${CLICKHOUSE_CLIENT} --multiline --multiquery -q """ +${CLICKHOUSE_CLIENT} --multiline -q """ DROP DATABASE IF EXISTS test4; CREATE DATABASE test4 ENGINE = S3('http://localhost:11111/test', 'test', 'testtest'); USE test4; @@ -33,7 +33,7 @@ SELECT * FROM \"b.tsv\" """ # Check named collection loading -${CLICKHOUSE_CLIENT} --multiline --multiquery -q """ +${CLICKHOUSE_CLIENT} --multiline -q """ DROP DATABASE IF EXISTS test5; CREATE DATABASE test5 ENGINE = S3(s3_conn_db); SELECT * FROM test5.\`b.tsv\` @@ -41,20 +41,20 @@ SELECT * FROM test5.\`b.tsv\` ################# echo "Test 2: check exceptions" -${CLICKHOUSE_CLIENT} --multiline --multiquery -q """ +${CLICKHOUSE_CLIENT} --multiline -q """ DROP DATABASE IF EXISTS test3; CREATE DATABASE test3 ENGINE = S3; USE test3; SELECT * FROM \"http://localhost:11111/test/a.myext\" """ 2>&1 | tr '\n' ' ' | grep -oF -e "UNKNOWN_TABLE" -e "S3_ERROR" > /dev/null && echo "OK" || echo 'FAIL' ||: -${CLICKHOUSE_CLIENT} --multiline --multiquery -q """ +${CLICKHOUSE_CLIENT} --multiline -q """ USE test3; SELECT * FROM \"abacaba\" """ 2>&1 | tr '\n' ' ' | grep -oF -e "UNKNOWN_TABLE" -e "BAD_ARGUMENTS" > /dev/null && echo "OK" || echo 'FAIL' ||: # Cleanup -${CLICKHOUSE_CLIENT} --multiline --multiquery -q """ +${CLICKHOUSE_CLIENT} --multiline -q """ DROP DATABASE IF EXISTS test1; DROP DATABASE IF EXISTS test2; DROP DATABASE IF EXISTS test3; diff --git a/tests/queries/0_stateless/02725_database_hdfs.sh b/tests/queries/0_stateless/02725_database_hdfs.sh index 1eb22976b84..7fd35c72ef1 100755 --- a/tests/queries/0_stateless/02725_database_hdfs.sh +++ b/tests/queries/0_stateless/02725_database_hdfs.sh @@ -25,7 +25,7 @@ fi echo "Test 1: select from hdfs database" # Database without specific host -${CLICKHOUSE_CLIENT} --multiline --multiquery -q """ +${CLICKHOUSE_CLIENT} --multiline -q """ DROP DATABASE IF EXISTS test_hdfs_1; CREATE DATABASE test_hdfs_1 ENGINE = HDFS; USE test_hdfs_1; @@ -34,7 +34,7 @@ SELECT * FROM \"hdfs://localhost:12222/test_02725_1.tsv\" ${CLICKHOUSE_CLIENT} -q "SHOW DATABASES;" | grep test_hdfs_1 # Database with host -${CLICKHOUSE_CLIENT} --multiline --multiquery -q """ +${CLICKHOUSE_CLIENT} --multiline -q """ DROP DATABASE IF EXISTS test_hdfs_2; CREATE DATABASE test_hdfs_2 ENGINE = HDFS('hdfs://localhost:12222'); USE test_hdfs_2; @@ -45,12 +45,12 @@ ${CLICKHOUSE_CLIENT} -q "SHOW DATABASES;" | grep test_hdfs_2 ################# echo "Test 2: check exceptions" -${CLICKHOUSE_CLIENT} --multiline --multiquery -q """ +${CLICKHOUSE_CLIENT} --multiline -q """ DROP DATABASE IF EXISTS test_hdfs_3; CREATE DATABASE test_hdfs_3 ENGINE = HDFS('abacaba'); """ 2>&1 | tr '\n' ' ' | grep -oF "BAD_ARGUMENTS" -${CLICKHOUSE_CLIENT} --multiline --multiquery -q """ +${CLICKHOUSE_CLIENT} --multiline -q """ DROP DATABASE IF EXISTS test_hdfs_4; CREATE DATABASE test_hdfs_4 ENGINE = HDFS; USE test_hdfs_4; @@ -64,7 +64,7 @@ ${CLICKHOUSE_CLIENT} --query "SELECT * FROM test_hdfs_4.\`hdfs://localhost:12222 # Cleanup -${CLICKHOUSE_CLIENT} --multiline --multiquery -q """ +${CLICKHOUSE_CLIENT} --multiline -q """ DROP DATABASE IF EXISTS test_hdfs_1; DROP DATABASE IF EXISTS test_hdfs_2; DROP DATABASE IF EXISTS test_hdfs_3; diff --git a/tests/queries/0_stateless/02725_local_query_parameters.sh b/tests/queries/0_stateless/02725_local_query_parameters.sh index 92d7f645454..151a854d5b9 100755 --- a/tests/queries/0_stateless/02725_local_query_parameters.sh +++ b/tests/queries/0_stateless/02725_local_query_parameters.sh @@ -5,6 +5,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -$CLICKHOUSE_LOCAL --multiquery -q " +$CLICKHOUSE_LOCAL -q " SET param_x=1; SELECT {x:UInt64}, {x:String};" diff --git a/tests/queries/0_stateless/02751_multiquery_with_argument.reference b/tests/queries/0_stateless/02751_multiquery_with_argument.reference index 2e55712e49c..f02e9bab2cd 100644 --- a/tests/queries/0_stateless/02751_multiquery_with_argument.reference +++ b/tests/queries/0_stateless/02751_multiquery_with_argument.reference @@ -5,8 +5,6 @@ Syntax error Empty query Empty query -BAD_ARGUMENTS -BAD_ARGUMENTS 301 302 304 diff --git a/tests/queries/0_stateless/02751_multiquery_with_argument.sh b/tests/queries/0_stateless/02751_multiquery_with_argument.sh index 7b959a3c3dc..4021194656b 100755 --- a/tests/queries/0_stateless/02751_multiquery_with_argument.sh +++ b/tests/queries/0_stateless/02751_multiquery_with_argument.sh @@ -4,18 +4,14 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -$CLICKHOUSE_LOCAL --multiquery "SELECT 100" -$CLICKHOUSE_LOCAL --multiquery "SELECT 101;" -$CLICKHOUSE_LOCAL --multiquery "SELECT 102;SELECT 103;" +$CLICKHOUSE_LOCAL "SELECT 100" +$CLICKHOUSE_LOCAL "SELECT 101;" +$CLICKHOUSE_LOCAL "SELECT 102;SELECT 103;" # Invalid SQL. -$CLICKHOUSE_LOCAL --multiquery "SELECT 200; S" 2>&1 | grep -o 'Syntax error' -$CLICKHOUSE_LOCAL --multiquery "; SELECT 201;" 2>&1 | grep -o 'Empty query' -$CLICKHOUSE_LOCAL --multiquery "; S; SELECT 202" 2>&1 | grep -o 'Empty query' - -# Simultaneously passing --queries-file + --query (multiquery) is prohibited. -$CLICKHOUSE_LOCAL --queries-file "queries.csv" --multiquery "SELECT 250;" 2>&1 | grep -o 'BAD_ARGUMENTS' -$CLICKHOUSE_CLIENT --queries-file "queries.csv" --multiquery "SELECT 251;" 2>&1 | grep -o 'BAD_ARGUMENTS' +$CLICKHOUSE_LOCAL "SELECT 200; S" 2>&1 | grep -o 'Syntax error' +$CLICKHOUSE_LOCAL "; SELECT 201;" 2>&1 | grep -o 'Empty query' +$CLICKHOUSE_LOCAL "; S; SELECT 202" 2>&1 | grep -o 'Empty query' # Error expectation cases. # -n is also interpreted as a query diff --git a/tests/queries/0_stateless/02815_no_throw_in_simple_queries.sh b/tests/queries/0_stateless/02815_no_throw_in_simple_queries.sh index 68c55f9b66a..18ffc9dfec3 100755 --- a/tests/queries/0_stateless/02815_no_throw_in_simple_queries.sh +++ b/tests/queries/0_stateless/02815_no_throw_in_simple_queries.sh @@ -19,7 +19,7 @@ $CLICKHOUSE_CLIENT --query "SHOW TABLES" || echo "Failed" $CLICKHOUSE_CLIENT --query "SELECT * FROM system.tables WHERE database = currentDatabase() FORMAT Null" || echo "Failed" # Multi queries are ok: -$CLICKHOUSE_LOCAL --multiquery "SELECT 1; SELECT 2;" || echo "Failed" +$CLICKHOUSE_LOCAL "SELECT 1; SELECT 2;" || echo "Failed" # It can run in interactive mode: function run() diff --git a/tests/queries/0_stateless/02843_insertion_table_schema_infer.sh b/tests/queries/0_stateless/02843_insertion_table_schema_infer.sh index d806b678456..9207e48092f 100755 --- a/tests/queries/0_stateless/02843_insertion_table_schema_infer.sh +++ b/tests/queries/0_stateless/02843_insertion_table_schema_infer.sh @@ -9,7 +9,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) DATA_DIR=$CUR_DIR/data_tsv -$CLICKHOUSE_LOCAL --multiquery \ +$CLICKHOUSE_LOCAL \ "CREATE VIEW users AS SELECT * FROM file('$DATA_DIR/mock_data.tsv', TSVWithNamesAndTypes); CREATE TABLE users_output (name String, tag UInt64)ENGINE = Memory; INSERT INTO users_output WITH (SELECT groupUniqArrayArray(mapKeys(Tags)) FROM users) AS unique_tags SELECT UserName AS name, length(unique_tags) AS tag FROM users; diff --git a/tests/queries/0_stateless/02864_restore_table_with_broken_part.sh b/tests/queries/0_stateless/02864_restore_table_with_broken_part.sh index 08313e2fd3b..229f832ba14 100755 --- a/tests/queries/0_stateless/02864_restore_table_with_broken_part.sh +++ b/tests/queries/0_stateless/02864_restore_table_with_broken_part.sh @@ -39,7 +39,7 @@ $CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS tbl" # Then try to restore with the setting `restore_broken_parts_as_detached` set to true. $CLICKHOUSE_CLIENT --query "RESTORE TABLE default.tbl AS tbl FROM Disk('backups', '${backup_name}') SETTINGS restore_broken_parts_as_detached = true" 2>/dev/null | awk -F '\t' '{print $2}' -$CLICKHOUSE_CLIENT --multiquery <&1 | grep -c "INVOKER") >= 1 )) && echo "OK" || echo "UNEXPECTED" (( $(${CLICKHOUSE_CLIENT} --query "SHOW TABLE $db.test_view_2" 2>&1 | grep -c "DEFINER = $user1") >= 1 )) && echo "OK" || echo "UNEXPECTED" -${CLICKHOUSE_CLIENT} --multiquery <&1 | grep -c "Not enough privileges") >= 1 )) && echo "OK" || echo "UNEXPECTED" (( $(${CLICKHOUSE_CLIENT} --query "INSERT INTO $db.test_table VALUES ('foo'), ('bar');" 2>&1 | grep -c "Not enough privileges") >= 1 )) && echo "OK" || echo "UNEXPECTED" -${CLICKHOUSE_CLIENT} --multiquery <\n" $CLICKHOUSE_CLIENT --query "SELECT * FROM file(${UNIX_ENDINGS}, 'TabSeparated', 'SearchTerm String, Date Date, Hits UInt32');" -$CLICKHOUSE_CLIENT --multiquery --query "SELECT * FROM file(${DOS_ENDINGS}, 'TabSeparated', 'SearchTerm String, Date Date, Hits UInt32'); --{serverError 117}" +$CLICKHOUSE_CLIENT --query "SELECT * FROM file(${DOS_ENDINGS}, 'TabSeparated', 'SearchTerm String, Date Date, Hits UInt32'); --{serverError 117}" echo -e "\n<-- Read DOS endings with setting input_format_tsv_crlf_end_of_line=1 -->\n" $CLICKHOUSE_CLIENT --query "SELECT * FROM file(${DOS_ENDINGS}, 'TabSeparated', 'SearchTerm String, Date Date, Hits UInt32') SETTINGS input_format_tsv_crlf_end_of_line = 1;" diff --git a/tests/queries/0_stateless/02995_forget_partition.sh b/tests/queries/0_stateless/02995_forget_partition.sh index c22f5829130..6fa0b96e90d 100755 --- a/tests/queries/0_stateless/02995_forget_partition.sh +++ b/tests/queries/0_stateless/02995_forget_partition.sh @@ -6,7 +6,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CUR_DIR"/../shell_config.sh -${CLICKHOUSE_CLIENT} --multiline --multiquery -q """ +${CLICKHOUSE_CLIENT} --multiline -q """ drop table if exists forget_partition; create table forget_partition @@ -31,7 +31,7 @@ alter table forget_partition drop partition '20240102'; # DROP PARTITION do not wait for a part to be removed from memory due to possible concurrent SELECTs, so we have to do wait manually here while [[ $(${CLICKHOUSE_CLIENT} -q "select count() from system.parts where database=currentDatabase() and table='forget_partition' and partition IN ('20240101', '20240102')") != 0 ]]; do sleep 1; done -${CLICKHOUSE_CLIENT} --multiline --multiquery -q """ +${CLICKHOUSE_CLIENT} --multiline -q """ set allow_unrestricted_reads_from_keeper=1; select '---before---'; diff --git a/tests/queries/0_stateless/02995_index_1.sh b/tests/queries/0_stateless/02995_index_1.sh index a5f1b30c2e8..128697fd0fe 100755 --- a/tests/queries/0_stateless/02995_index_1.sh +++ b/tests/queries/0_stateless/02995_index_1.sh @@ -5,7 +5,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -${CLICKHOUSE_CLIENT} --multiquery " +${CLICKHOUSE_CLIENT} " DROP TABLE IF EXISTS test; CREATE TABLE test (a String, b String, c String) ENGINE = MergeTree ORDER BY (a, b, c) SETTINGS index_granularity = 11; @@ -39,6 +39,6 @@ WHERE a >= (round(pow(sipHash64(1, try), 1 / (3 + sipHash64(2, try) % 8))) AS a1 AND c <= (c1 + round(pow(sipHash64(11, try), 1 / (3 + sipHash64(12, try) % 8))))::String HAVING count() > 0; " -done | ${CLICKHOUSE_CLIENT} --multiquery +done | ${CLICKHOUSE_CLIENT} -${CLICKHOUSE_CLIENT} --multiquery "DROP TABLE test" +${CLICKHOUSE_CLIENT} "DROP TABLE test" diff --git a/tests/queries/0_stateless/02995_index_10.sh b/tests/queries/0_stateless/02995_index_10.sh index d72c7c72705..c15ba00fd05 100755 --- a/tests/queries/0_stateless/02995_index_10.sh +++ b/tests/queries/0_stateless/02995_index_10.sh @@ -5,7 +5,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -${CLICKHOUSE_CLIENT} --multiquery " +${CLICKHOUSE_CLIENT} " DROP TABLE IF EXISTS test; CREATE TABLE test (a String, b String, c String) ENGINE = MergeTree ORDER BY (a, b, c) SETTINGS index_granularity = 11; @@ -39,6 +39,6 @@ WHERE a >= (round(pow(sipHash64(1, try), 1 / (3 + sipHash64(2, try) % 8))) AS a1 AND c <= (c1 + round(pow(sipHash64(11, try), 1 / (3 + sipHash64(12, try) % 8))))::String HAVING count() > 0; " -done | ${CLICKHOUSE_CLIENT} --multiquery +done | ${CLICKHOUSE_CLIENT} -${CLICKHOUSE_CLIENT} --multiquery "DROP TABLE test" +${CLICKHOUSE_CLIENT} "DROP TABLE test" diff --git a/tests/queries/0_stateless/02995_index_2.sh b/tests/queries/0_stateless/02995_index_2.sh index e7451c7ee4b..a32f5c511f8 100755 --- a/tests/queries/0_stateless/02995_index_2.sh +++ b/tests/queries/0_stateless/02995_index_2.sh @@ -5,7 +5,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -${CLICKHOUSE_CLIENT} --multiquery " +${CLICKHOUSE_CLIENT} " DROP TABLE IF EXISTS test; CREATE TABLE test (a String, b String, c String) ENGINE = MergeTree ORDER BY (a, b, c) SETTINGS index_granularity = 11; @@ -39,6 +39,6 @@ WHERE a >= (round(pow(sipHash64(1, try), 1 / (3 + sipHash64(2, try) % 8))) AS a1 AND c <= (c1 + round(pow(sipHash64(11, try), 1 / (3 + sipHash64(12, try) % 8))))::String HAVING count() > 0; " -done | ${CLICKHOUSE_CLIENT} --multiquery +done | ${CLICKHOUSE_CLIENT} -${CLICKHOUSE_CLIENT} --multiquery "DROP TABLE test" +${CLICKHOUSE_CLIENT} "DROP TABLE test" diff --git a/tests/queries/0_stateless/02995_index_3.sh b/tests/queries/0_stateless/02995_index_3.sh index 506429e2696..9cc937391fc 100755 --- a/tests/queries/0_stateless/02995_index_3.sh +++ b/tests/queries/0_stateless/02995_index_3.sh @@ -5,7 +5,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -${CLICKHOUSE_CLIENT} --multiquery " +${CLICKHOUSE_CLIENT} " DROP TABLE IF EXISTS test; CREATE TABLE test (a String, b String, c String) ENGINE = MergeTree ORDER BY (a, b, c) SETTINGS index_granularity = 11; @@ -39,6 +39,6 @@ WHERE a >= (round(pow(sipHash64(1, try), 1 / (3 + sipHash64(2, try) % 8))) AS a1 AND c <= (c1 + round(pow(sipHash64(11, try), 1 / (3 + sipHash64(12, try) % 8))))::String HAVING count() > 0; " -done | ${CLICKHOUSE_CLIENT} --multiquery +done | ${CLICKHOUSE_CLIENT} -${CLICKHOUSE_CLIENT} --multiquery "DROP TABLE test" +${CLICKHOUSE_CLIENT} "DROP TABLE test" diff --git a/tests/queries/0_stateless/02995_index_4.sh b/tests/queries/0_stateless/02995_index_4.sh index 1a0458728f9..e450997e48b 100755 --- a/tests/queries/0_stateless/02995_index_4.sh +++ b/tests/queries/0_stateless/02995_index_4.sh @@ -5,7 +5,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -${CLICKHOUSE_CLIENT} --multiquery " +${CLICKHOUSE_CLIENT} " DROP TABLE IF EXISTS test; CREATE TABLE test (a String, b String, c String) ENGINE = MergeTree ORDER BY (a, b, c) SETTINGS index_granularity = 11; @@ -39,6 +39,6 @@ WHERE a >= (round(pow(sipHash64(1, try), 1 / (3 + sipHash64(2, try) % 8))) AS a1 AND c <= (c1 + round(pow(sipHash64(11, try), 1 / (3 + sipHash64(12, try) % 8))))::String HAVING count() > 0; " -done | ${CLICKHOUSE_CLIENT} --multiquery +done | ${CLICKHOUSE_CLIENT} -${CLICKHOUSE_CLIENT} --multiquery "DROP TABLE test" +${CLICKHOUSE_CLIENT} "DROP TABLE test" diff --git a/tests/queries/0_stateless/02995_index_5.sh b/tests/queries/0_stateless/02995_index_5.sh index 60c12a8146d..80f75a532e3 100755 --- a/tests/queries/0_stateless/02995_index_5.sh +++ b/tests/queries/0_stateless/02995_index_5.sh @@ -5,7 +5,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -${CLICKHOUSE_CLIENT} --multiquery " +${CLICKHOUSE_CLIENT} " DROP TABLE IF EXISTS test; CREATE TABLE test (a String, b String, c String) ENGINE = MergeTree ORDER BY (a, b, c) SETTINGS index_granularity = 11; @@ -39,6 +39,6 @@ WHERE a >= (round(pow(sipHash64(1, try), 1 / (3 + sipHash64(2, try) % 8))) AS a1 AND c <= (c1 + round(pow(sipHash64(11, try), 1 / (3 + sipHash64(12, try) % 8))))::String HAVING count() > 0; " -done | ${CLICKHOUSE_CLIENT} --multiquery +done | ${CLICKHOUSE_CLIENT} -${CLICKHOUSE_CLIENT} --multiquery "DROP TABLE test" +${CLICKHOUSE_CLIENT} "DROP TABLE test" diff --git a/tests/queries/0_stateless/02995_index_6.sh b/tests/queries/0_stateless/02995_index_6.sh index 4936f73f36b..e90387c7c0c 100755 --- a/tests/queries/0_stateless/02995_index_6.sh +++ b/tests/queries/0_stateless/02995_index_6.sh @@ -5,7 +5,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -${CLICKHOUSE_CLIENT} --multiquery " +${CLICKHOUSE_CLIENT} " DROP TABLE IF EXISTS test; CREATE TABLE test (a String, b String, c String) ENGINE = MergeTree ORDER BY (a, b, c) SETTINGS index_granularity = 11; @@ -39,6 +39,6 @@ WHERE a >= (round(pow(sipHash64(1, try), 1 / (3 + sipHash64(2, try) % 8))) AS a1 AND c <= (c1 + round(pow(sipHash64(11, try), 1 / (3 + sipHash64(12, try) % 8))))::String HAVING count() > 0; " -done | ${CLICKHOUSE_CLIENT} --multiquery +done | ${CLICKHOUSE_CLIENT} -${CLICKHOUSE_CLIENT} --multiquery "DROP TABLE test" +${CLICKHOUSE_CLIENT} "DROP TABLE test" diff --git a/tests/queries/0_stateless/02995_index_7.sh b/tests/queries/0_stateless/02995_index_7.sh index 26be310abce..a5fdd98b2f8 100755 --- a/tests/queries/0_stateless/02995_index_7.sh +++ b/tests/queries/0_stateless/02995_index_7.sh @@ -5,7 +5,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -${CLICKHOUSE_CLIENT} --multiquery " +${CLICKHOUSE_CLIENT} " DROP TABLE IF EXISTS test; CREATE TABLE test (a String, b String, c String) ENGINE = MergeTree ORDER BY (a, b, c) SETTINGS index_granularity = 11; @@ -39,6 +39,6 @@ WHERE a >= (round(pow(sipHash64(1, try), 1 / (3 + sipHash64(2, try) % 8))) AS a1 AND c <= (c1 + round(pow(sipHash64(11, try), 1 / (3 + sipHash64(12, try) % 8))))::String HAVING count() > 0; " -done | ${CLICKHOUSE_CLIENT} --multiquery +done | ${CLICKHOUSE_CLIENT} -${CLICKHOUSE_CLIENT} --multiquery "DROP TABLE test" +${CLICKHOUSE_CLIENT} "DROP TABLE test" diff --git a/tests/queries/0_stateless/02995_index_8.sh b/tests/queries/0_stateless/02995_index_8.sh index 8c2620b59fd..adb835aedca 100755 --- a/tests/queries/0_stateless/02995_index_8.sh +++ b/tests/queries/0_stateless/02995_index_8.sh @@ -5,7 +5,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -${CLICKHOUSE_CLIENT} --multiquery " +${CLICKHOUSE_CLIENT} " DROP TABLE IF EXISTS test; CREATE TABLE test (a String, b String, c String) ENGINE = MergeTree ORDER BY (a, b, c) SETTINGS index_granularity = 11; @@ -39,6 +39,6 @@ WHERE a >= (round(pow(sipHash64(1, try), 1 / (3 + sipHash64(2, try) % 8))) AS a1 AND c <= (c1 + round(pow(sipHash64(11, try), 1 / (3 + sipHash64(12, try) % 8))))::String HAVING count() > 0; " -done | ${CLICKHOUSE_CLIENT} --multiquery +done | ${CLICKHOUSE_CLIENT} -${CLICKHOUSE_CLIENT} --multiquery "DROP TABLE test" +${CLICKHOUSE_CLIENT} "DROP TABLE test" diff --git a/tests/queries/0_stateless/02995_index_9.sh b/tests/queries/0_stateless/02995_index_9.sh index 76160c62aaa..4b78777cd2a 100755 --- a/tests/queries/0_stateless/02995_index_9.sh +++ b/tests/queries/0_stateless/02995_index_9.sh @@ -5,7 +5,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -${CLICKHOUSE_CLIENT} --multiquery " +${CLICKHOUSE_CLIENT} " DROP TABLE IF EXISTS test; CREATE TABLE test (a String, b String, c String) ENGINE = MergeTree ORDER BY (a, b, c) SETTINGS index_granularity = 11; @@ -39,6 +39,6 @@ WHERE a >= (round(pow(sipHash64(1, try), 1 / (3 + sipHash64(2, try) % 8))) AS a1 AND c <= (c1 + round(pow(sipHash64(11, try), 1 / (3 + sipHash64(12, try) % 8))))::String HAVING count() > 0; " -done | ${CLICKHOUSE_CLIENT} --multiquery +done | ${CLICKHOUSE_CLIENT} -${CLICKHOUSE_CLIENT} --multiquery "DROP TABLE test" +${CLICKHOUSE_CLIENT} "DROP TABLE test" diff --git a/tests/queries/0_stateless/02998_native_parquet_reader.sh b/tests/queries/0_stateless/02998_native_parquet_reader.sh index d6369c4921b..95b659815ed 100755 --- a/tests/queries/0_stateless/02998_native_parquet_reader.sh +++ b/tests/queries/0_stateless/02998_native_parquet_reader.sh @@ -208,4 +208,4 @@ CH_SCHEMA="\ QUERY="SELECT * from file('$PAR_PATH', 'Parquet', '$CH_SCHEMA')" # there may be more than on group in parquet files, unstable results may generated by multithreads -$CLICKHOUSE_LOCAL --multiquery --max_threads 1 --max_parsing_threads 1 --input_format_parquet_use_native_reader true --query "$QUERY" +$CLICKHOUSE_LOCAL --max_threads 1 --max_parsing_threads 1 --input_format_parquet_use_native_reader true --query "$QUERY" diff --git a/tests/queries/0_stateless/03001_backup_matview_after_modify_query.sh b/tests/queries/0_stateless/03001_backup_matview_after_modify_query.sh index f857358a5ea..8c6aa70f14c 100755 --- a/tests/queries/0_stateless/03001_backup_matview_after_modify_query.sh +++ b/tests/queries/0_stateless/03001_backup_matview_after_modify_query.sh @@ -9,7 +9,7 @@ db="$CLICKHOUSE_DATABASE" db_2="${db}_2" backup_name="${db}_backup" -${CLICKHOUSE_CLIENT} --multiquery " +${CLICKHOUSE_CLIENT} " DROP TABLE IF EXISTS src; DROP TABLE IF EXISTS mv; CREATE TABLE src(Timestamp DateTime64(9), c1 String, c2 String) ENGINE=MergeTree ORDER BY Timestamp; diff --git a/tests/queries/0_stateless/03001_matview_columns_after_modify_query.sh b/tests/queries/0_stateless/03001_matview_columns_after_modify_query.sh index 2ec5832fac6..96cbd391a44 100755 --- a/tests/queries/0_stateless/03001_matview_columns_after_modify_query.sh +++ b/tests/queries/0_stateless/03001_matview_columns_after_modify_query.sh @@ -4,7 +4,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -${CLICKHOUSE_CLIENT} --multiquery " +${CLICKHOUSE_CLIENT} " DROP TABLE IF EXISTS src; DROP TABLE IF EXISTS mv; CREATE TABLE src(Timestamp DateTime64(9), c1 String, c2 String) ENGINE=MergeTree ORDER BY Timestamp; diff --git a/tests/queries/0_stateless/03006_correct_revoke_for_partial_rights.sh b/tests/queries/0_stateless/03006_correct_revoke_for_partial_rights.sh index 8c79dfdbafc..312fb03668c 100755 --- a/tests/queries/0_stateless/03006_correct_revoke_for_partial_rights.sh +++ b/tests/queries/0_stateless/03006_correct_revoke_for_partial_rights.sh @@ -8,7 +8,7 @@ db=${CLICKHOUSE_DATABASE} user1="user1_03006_${db}_$RANDOM" user2="user2_03006_${db}_$RANDOM" -${CLICKHOUSE_CLIENT} --multiquery <&1 | grep --text -F -v "ASan doesn't fully support makecontext/swapcontext functions" -${CLICKHOUSE_CLIENT} --multiquery " +${CLICKHOUSE_CLIENT} " DROP TABLE test; " diff --git a/tests/queries/0_stateless/03212_thousand_exceptions.sh b/tests/queries/0_stateless/03212_thousand_exceptions.sh index 0a6abf35c10..1237cbf537f 100755 --- a/tests/queries/0_stateless/03212_thousand_exceptions.sh +++ b/tests/queries/0_stateless/03212_thousand_exceptions.sh @@ -6,4 +6,4 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CURDIR"/../shell_config.sh # This should not be too slow, even under sanitizers. -yes "SELECT throwIf(1); SELECT '.' FORMAT Values;" | head -n 1000 | $CLICKHOUSE_CLIENT --multiquery --ignore-error 2>/dev/null +yes "SELECT throwIf(1); SELECT '.' FORMAT Values;" | head -n 1000 | $CLICKHOUSE_CLIENT --ignore-error 2>/dev/null From 6372fdee6d344bd87d58ce89fa069b55750c9aba Mon Sep 17 00:00:00 2001 From: kssenii Date: Tue, 30 Jul 2024 13:46:05 +0200 Subject: [PATCH 461/661] Update tests --- tests/queries/0_stateless/02995_index_3.sh | 2 +- tests/queries/0_stateless/03143_asof_join_ddb_long.sql | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/02995_index_3.sh b/tests/queries/0_stateless/02995_index_3.sh index 506429e2696..219ae81154f 100755 --- a/tests/queries/0_stateless/02995_index_3.sh +++ b/tests/queries/0_stateless/02995_index_3.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long, no-debug, no-asan, no-tsan, no-msan, no-ubsan, no-sanitize-coverage +# Tags: long, no-debug, no-asan, no-tsan, no-msan, no-ubsan, no-sanitize-coverage, no-distributed-cache CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/03143_asof_join_ddb_long.sql b/tests/queries/0_stateless/03143_asof_join_ddb_long.sql index 17a67511030..a927e4f1e1f 100644 --- a/tests/queries/0_stateless/03143_asof_join_ddb_long.sql +++ b/tests/queries/0_stateless/03143_asof_join_ddb_long.sql @@ -1,4 +1,4 @@ --- Tags: long +-- Tags: long, no-distributed-cache DROP TABLE IF EXISTS build; DROP TABLE IF EXISTS skewed_probe; From eb129b539fce2a407182d892ce3bd00f782a5833 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Tue, 30 Jul 2024 13:46:27 +0200 Subject: [PATCH 462/661] Add tests --- src/Storages/StorageKeeperMap.cpp | 135 +++++++++++++----- src/Storages/StorageKeeperMap.h | 3 +- .../test_keeper_map_retries/__init__.py | 0 .../configs/enable_keeper_map.xml | 3 + .../configs/fault_injection.xml | 6 + .../test_keeper_map_retries/test.py | 78 ++++++++++ .../02911_backup_restore_keeper_map.sh | 15 +- 7 files changed, 194 insertions(+), 46 deletions(-) create mode 100644 tests/integration/test_keeper_map_retries/__init__.py create mode 100644 tests/integration/test_keeper_map_retries/configs/enable_keeper_map.xml create mode 100644 tests/integration/test_keeper_map_retries/configs/fault_injection.xml create mode 100644 tests/integration/test_keeper_map_retries/test.py diff --git a/src/Storages/StorageKeeperMap.cpp b/src/Storages/StorageKeeperMap.cpp index 5534bb7f346..09c21ae28f5 100644 --- a/src/Storages/StorageKeeperMap.cpp +++ b/src/Storages/StorageKeeperMap.cpp @@ -183,7 +183,7 @@ public: settings.insert_keeper_retry_max_backoff_ms}, context->getProcessListElement()}; - retries_ctl.retryLoop([&]() + zk_retry.retryLoop([&]() { auto zookeeper = storage.getClient(); auto keys_limit = storage.keysLimit(); @@ -205,12 +205,12 @@ public: for (const auto & [key, _] : new_values) key_paths.push_back(storage.fullPathForKey(key)); - zkutil::ZooKeeper::MultiExistsResponse results; + zkutil::ZooKeeper::MultiTryGetResponse results; if constexpr (!for_update) { if (!strict) - results = zookeeper->exists(key_paths); + results = zookeeper->tryGet(key_paths); } Coordination::Requests requests; @@ -231,7 +231,8 @@ public: { if (!strict && results[i].error == Coordination::Error::ZOK) { - requests.push_back(zkutil::makeSetRequest(key_paths[i], new_values[key], -1)); + if (results[i].data != new_values[key]) + requests.push_back(zkutil::makeSetRequest(key_paths[i], new_values[key], -1)); } else { @@ -241,6 +242,9 @@ public: } } + if (requests.empty()) + return; + if (new_keys_num != 0) { auto will_be = current_keys_num + new_keys_num; @@ -259,7 +263,7 @@ public: }; template -class StorageKeeperMapSource : public ISource +class StorageKeeperMapSource : public ISource, WithContext { const StorageKeeperMap & storage; size_t max_block_size; @@ -290,8 +294,15 @@ public: KeyContainerPtr container_, KeyContainerIter begin_, KeyContainerIter end_, - bool with_version_column_) - : ISource(getHeader(header, with_version_column_)), storage(storage_), max_block_size(max_block_size_), container(std::move(container_)), it(begin_), end(end_) + bool with_version_column_, + ContextPtr context_) + : ISource(getHeader(header, with_version_column_)) + , WithContext(std::move(context_)) + , storage(storage_) + , max_block_size(max_block_size_) + , container(std::move(container_)) + , it(begin_) + , end(end_) , with_version_column(with_version_column_) { } @@ -316,12 +327,12 @@ public: for (auto & raw_key : raw_keys) raw_key = base64Encode(raw_key, /* url_encoding */ true); - return storage.getBySerializedKeys(raw_keys, nullptr, with_version_column); + return storage.getBySerializedKeys(raw_keys, nullptr, with_version_column, getContext()); } else { size_t elem_num = std::min(max_block_size, static_cast(end - it)); - auto chunk = storage.getBySerializedKeys(std::span{it, it + elem_num}, nullptr, with_version_column); + auto chunk = storage.getBySerializedKeys(std::span{it, it + elem_num}, nullptr, with_version_column, getContext()); it += elem_num; return chunk; } @@ -553,14 +564,31 @@ Pipe StorageKeeperMap::read( using KeyContainer = typename KeyContainerPtr::element_type; pipes.emplace_back(std::make_shared>( - *this, sample_block, max_block_size, keys, keys->begin() + begin, keys->begin() + end, with_version_column)); + *this, sample_block, max_block_size, keys, keys->begin() + begin, keys->begin() + end, with_version_column, context_)); } return Pipe::unitePipes(std::move(pipes)); }; - auto client = getClient(); if (all_scan) - return process_keys(std::make_shared>(client->getChildren(zk_data_path))); + { + const auto & settings = context_->getSettingsRef(); + ZooKeeperRetriesControl zk_retry{ + getName(), + getLogger(getName()), + ZooKeeperRetriesInfo{ + settings.keeper_max_retries, + settings.keeper_retry_initial_backoff_ms, + settings.keeper_retry_max_backoff_ms}, + context_->getProcessListElement()}; + + std::vector children; + zk_retry.retryLoop([&] + { + auto client = getClient(); + children = client->getChildren(zk_data_path); + }); + return process_keys(std::make_shared>(std::move(children))); + } return process_keys(std::move(filtered_keys)); } @@ -571,11 +599,24 @@ SinkToStoragePtr StorageKeeperMap::write(const ASTPtr & /*query*/, const Storage return std::make_shared(*this, metadata_snapshot->getSampleBlock(), local_context); } -void StorageKeeperMap::truncate(const ASTPtr &, const StorageMetadataPtr &, ContextPtr, TableExclusiveLockHolder &) +void StorageKeeperMap::truncate(const ASTPtr &, const StorageMetadataPtr &, ContextPtr local_context, TableExclusiveLockHolder &) { checkTable(); - auto client = getClient(); - client->tryRemoveChildrenRecursive(zk_data_path, true); + const auto & settings = local_context->getSettingsRef(); + ZooKeeperRetriesControl zk_retry{ + getName(), + getLogger(getName()), + ZooKeeperRetriesInfo{ + settings.keeper_max_retries, + settings.keeper_retry_initial_backoff_ms, + settings.keeper_retry_max_backoff_ms}, + local_context->getProcessListElement()}; + + zk_retry.retryLoop([&] + { + auto client = getClient(); + client->tryRemoveChildrenRecursive(zk_data_path, true); + }); } bool StorageKeeperMap::dropTable(zkutil::ZooKeeperPtr zookeeper, const zkutil::EphemeralNodeHolder::Ptr & metadata_drop_lock) @@ -1064,10 +1105,11 @@ Chunk StorageKeeperMap::getByKeys(const ColumnsWithTypeAndName & keys, PaddedPOD if (raw_keys.size() != keys[0].column->size()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Assertion failed: {} != {}", raw_keys.size(), keys[0].column->size()); - return getBySerializedKeys(raw_keys, &null_map, /* version_column */ false); + return getBySerializedKeys(raw_keys, &null_map, /* version_column */ false, getContext()); } -Chunk StorageKeeperMap::getBySerializedKeys(const std::span keys, PaddedPODArray * null_map, bool with_version) const +Chunk StorageKeeperMap::getBySerializedKeys( + const std::span keys, PaddedPODArray * null_map, bool with_version, const ContextPtr & local_context) const { Block sample_block = getInMemoryMetadataPtr()->getSampleBlock(); MutableColumns columns = sample_block.cloneEmptyColumns(); @@ -1084,17 +1126,27 @@ Chunk StorageKeeperMap::getBySerializedKeys(const std::span k null_map->resize_fill(keys.size(), 1); } - auto client = getClient(); - Strings full_key_paths; full_key_paths.reserve(keys.size()); for (const auto & key : keys) - { full_key_paths.emplace_back(fullPathForKey(key)); - } - auto values = client->tryGet(full_key_paths); + const auto & settings = local_context->getSettingsRef(); + ZooKeeperRetriesControl zk_retry{ + getName(), + getLogger(getName()), + ZooKeeperRetriesInfo{ + settings.keeper_max_retries, + settings.keeper_retry_initial_backoff_ms, + settings.keeper_retry_max_backoff_ms}, + local_context->getProcessListElement()}; + + zkutil::ZooKeeper::MultiTryGetResponse values; + zk_retry.retryLoop([&]{ + auto client = getClient(); + values = client->tryGet(full_key_paths); + }); for (size_t i = 0; i < keys.size(); ++i) { @@ -1182,16 +1234,16 @@ void StorageKeeperMap::mutate(const MutationCommands & commands, ContextPtr loca if (commands.front().type == MutationCommand::Type::DELETE) { - MutationsInterpreter::Settings settings(true); - settings.return_all_columns = true; - settings.return_mutated_rows = true; + MutationsInterpreter::Settings mutation_settings(true); + mutation_settings.return_all_columns = true; + mutation_settings.return_mutated_rows = true; auto interpreter = std::make_unique( storage_ptr, metadata_snapshot, commands, local_context, - settings); + mutation_settings); auto pipeline = QueryPipelineBuilder::getPipeline(interpreter->execute()); PullingPipelineExecutor executor(pipeline); @@ -1200,8 +1252,6 @@ void StorageKeeperMap::mutate(const MutationCommands & commands, ContextPtr loca auto primary_key_pos = header.getPositionByName(primary_key); auto version_position = header.getPositionByName(std::string{version_column_name}); - auto client = getClient(); - Block block; while (executor.pull(block)) { @@ -1229,7 +1279,23 @@ void StorageKeeperMap::mutate(const MutationCommands & commands, ContextPtr loca } Coordination::Responses responses; - auto status = client->tryMulti(delete_requests, responses, /* check_session_valid */ true); + + const auto & settings = local_context->getSettingsRef(); + ZooKeeperRetriesControl zk_retry{ + getName(), + getLogger(getName()), + ZooKeeperRetriesInfo{ + settings.keeper_max_retries, + settings.keeper_retry_initial_backoff_ms, + settings.keeper_retry_max_backoff_ms}, + local_context->getProcessListElement()}; + + Coordination::Error status; + zk_retry.retryLoop([&] + { + auto client = getClient(); + status = client->tryMulti(delete_requests, responses, /* check_session_valid */ true); + }); if (status == Coordination::Error::ZOK) return; @@ -1241,9 +1307,14 @@ void StorageKeeperMap::mutate(const MutationCommands & commands, ContextPtr loca for (const auto & delete_request : delete_requests) { - auto code = client->tryRemove(delete_request->getPath()); - if (code != Coordination::Error::ZOK && code != Coordination::Error::ZNONODE) - throw zkutil::KeeperException::fromPath(code, delete_request->getPath()); + zk_retry.retryLoop([&] + { + auto client = getClient(); + status = client->tryRemove(delete_request->getPath()); + }); + + if (status != Coordination::Error::ZOK && status != Coordination::Error::ZNONODE) + throw zkutil::KeeperException::fromPath(status, delete_request->getPath()); } } diff --git a/src/Storages/StorageKeeperMap.h b/src/Storages/StorageKeeperMap.h index d4556792c48..cfbb35ab2fe 100644 --- a/src/Storages/StorageKeeperMap.h +++ b/src/Storages/StorageKeeperMap.h @@ -54,7 +54,8 @@ public: Names getPrimaryKey() const override { return {primary_key}; } Chunk getByKeys(const ColumnsWithTypeAndName & keys, PaddedPODArray & null_map, const Names &) const override; - Chunk getBySerializedKeys(std::span keys, PaddedPODArray * null_map, bool with_version) const; + Chunk getBySerializedKeys( + std::span keys, PaddedPODArray * null_map, bool with_version, const ContextPtr & local_context) const; Block getSampleBlock(const Names &) const override; diff --git a/tests/integration/test_keeper_map_retries/__init__.py b/tests/integration/test_keeper_map_retries/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_keeper_map_retries/configs/enable_keeper_map.xml b/tests/integration/test_keeper_map_retries/configs/enable_keeper_map.xml new file mode 100644 index 00000000000..b4cbb6a954b --- /dev/null +++ b/tests/integration/test_keeper_map_retries/configs/enable_keeper_map.xml @@ -0,0 +1,3 @@ + + /test_keeper_map + diff --git a/tests/integration/test_keeper_map_retries/configs/fault_injection.xml b/tests/integration/test_keeper_map_retries/configs/fault_injection.xml new file mode 100644 index 00000000000..145945c7c7c --- /dev/null +++ b/tests/integration/test_keeper_map_retries/configs/fault_injection.xml @@ -0,0 +1,6 @@ + + + 0.05 + 0.05 + + diff --git a/tests/integration/test_keeper_map_retries/test.py b/tests/integration/test_keeper_map_retries/test.py new file mode 100644 index 00000000000..352119147cd --- /dev/null +++ b/tests/integration/test_keeper_map_retries/test.py @@ -0,0 +1,78 @@ +import pytest + +from helpers.cluster import ClickHouseCluster + +import os + +CONFIG_DIR = os.path.join(os.path.dirname(os.path.realpath(__file__)), "configs") + +cluster = ClickHouseCluster(__file__) + +node = cluster.add_instance( + "node", + main_configs=["configs/enable_keeper_map.xml"], + with_zookeeper=True, + stay_alive=True, +) + + +def start_clean_clickhouse(): + # remove fault injection if present + if "fault_injection.xml" in node.exec_in_container( + ["bash", "-c", "ls /etc/clickhouse-server/config.d"] + ): + print("Removing fault injection") + node.exec_in_container( + ["bash", "-c", "rm /etc/clickhouse-server/config.d/fault_injection.xml"] + ) + node.restart_clickhouse() + + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + yield cluster + + finally: + cluster.shutdown() + + +def repeat_query(query, repeat): + for _ in range(repeat): + node.query( + query, + settings={ + "keeper_max_retries": 20, + "keeper_retry_max_backoff_ms": 10000, + }, + ) + + +def test_queries(started_cluster): + start_clean_clickhouse() + + node.query("DROP TABLE IF EXISTS keeper_map_retries SYNC") + node.query( + "CREATE TABLE keeper_map_retries (a UInt64, b UInt64) Engine=KeeperMap('/keeper_map_retries') PRIMARY KEY a" + ) + + node.stop_clickhouse() + node.copy_file_to_container( + os.path.join(CONFIG_DIR, "fault_injection.xml"), + "/etc/clickhouse-server/config.d/fault_injection.xml", + ) + node.start_clickhouse() + + repeat_count = 10 + + repeat_query( + "INSERT INTO keeper_map_retries SELECT number, number FROM numbers(500)", + repeat_count, + ) + repeat_query("SELECT * FROM keeper_map_retries", repeat_count) + repeat_query( + "ALTER TABLE keeper_map_retries UPDATE b = 3 WHERE a > 2", repeat_count + ) + repeat_query("ALTER TABLE keeper_map_retries DELETE WHERE a > 2", repeat_count) + repeat_query("TRUNCATE keeper_map_retries", repeat_count) diff --git a/tests/queries/0_stateless/02911_backup_restore_keeper_map.sh b/tests/queries/0_stateless/02911_backup_restore_keeper_map.sh index ee070b40f6f..c04667505c3 100755 --- a/tests/queries/0_stateless/02911_backup_restore_keeper_map.sh +++ b/tests/queries/0_stateless/02911_backup_restore_keeper_map.sh @@ -13,20 +13,9 @@ $CLICKHOUSE_CLIENT -nm -q " CREATE TABLE $database_name.02911_backup_restore_keeper_map3 (key UInt64, value String) Engine=KeeperMap('/' || currentDatabase() || '/test02911_different') PRIMARY KEY key; " -# KeeperMap table engine doesn't have internal retries for interaction with Keeper. Do it on our own, otherwise tests with overloaded server can be flaky. -while true -do - $CLICKHOUSE_CLIENT -nm -q "INSERT INTO $database_name.02911_backup_restore_keeper_map2 SELECT number, 'test' || toString(number) FROM system.numbers LIMIT 5000; - " 2>&1 | grep -q "KEEPER_EXCEPTION" && sleep 1 && continue - break -done +$CLICKHOUSE_CLIENT -nm -q "INSERT INTO $database_name.02911_backup_restore_keeper_map2 SELECT number, 'test' || toString(number) FROM system.numbers LIMIT 5000;" -while true -do - $CLICKHOUSE_CLIENT -nm -q "INSERT INTO $database_name.02911_backup_restore_keeper_map3 SELECT number, 'test' || toString(number) FROM system.numbers LIMIT 3000; - " 2>&1 | grep -q "KEEPER_EXCEPTION" && sleep 1 && continue - break -done +$CLICKHOUSE_CLIENT -nm -q "INSERT INTO $database_name.02911_backup_restore_keeper_map3 SELECT number, 'test' || toString(number) FROM system.numbers LIMIT 3000;" backup_path="$database_name" for i in $(seq 1 3); do From fd075470d6e141d5aa4d01ccefcc3d3ee04130c7 Mon Sep 17 00:00:00 2001 From: divanik Date: Tue, 30 Jul 2024 11:54:00 +0000 Subject: [PATCH 463/661] Add docs --- tests/integration/README.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tests/integration/README.md b/tests/integration/README.md index cde4cb05aec..d5137a9c148 100644 --- a/tests/integration/README.md +++ b/tests/integration/README.md @@ -130,6 +130,14 @@ docker build -t clickhouse/integration-test . ``` The helper container used by the `runner` script is in `docker/test/integration/runner/Dockerfile`. +It can be rebuild with + +``` +cd docker/test/integration/runner +docker build -t clickhouse/integration-test-runner . +``` + +Also you need to add option --network=host if you rebuild image for a local integration testsing. ### Adding new tests From fd26672864a7e1557908b878d7daa018de20c61a Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Tue, 30 Jul 2024 13:54:30 +0200 Subject: [PATCH 464/661] Revert some change --- src/Storages/StorageKeeperMap.cpp | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/src/Storages/StorageKeeperMap.cpp b/src/Storages/StorageKeeperMap.cpp index 09c21ae28f5..1559b442e43 100644 --- a/src/Storages/StorageKeeperMap.cpp +++ b/src/Storages/StorageKeeperMap.cpp @@ -35,7 +35,6 @@ #include #include -#include "Common/ZooKeeper/ZooKeeperRetries.h" #include #include #include @@ -44,6 +43,7 @@ #include #include #include +#include #include #include @@ -205,12 +205,12 @@ public: for (const auto & [key, _] : new_values) key_paths.push_back(storage.fullPathForKey(key)); - zkutil::ZooKeeper::MultiTryGetResponse results; + zkutil::ZooKeeper::MultiExistsResponse results; if constexpr (!for_update) { if (!strict) - results = zookeeper->tryGet(key_paths); + results = zookeeper->exists(key_paths); } Coordination::Requests requests; @@ -231,8 +231,7 @@ public: { if (!strict && results[i].error == Coordination::Error::ZOK) { - if (results[i].data != new_values[key]) - requests.push_back(zkutil::makeSetRequest(key_paths[i], new_values[key], -1)); + requests.push_back(zkutil::makeSetRequest(key_paths[i], new_values[key], -1)); } else { @@ -242,9 +241,6 @@ public: } } - if (requests.empty()) - return; - if (new_keys_num != 0) { auto will_be = current_keys_num + new_keys_num; From 0124d211ec81a3779fe0e99c868fe85be8856629 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Tue, 30 Jul 2024 14:00:52 +0200 Subject: [PATCH 465/661] Better --- tests/integration/test_replicated_table_attach/test.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/tests/integration/test_replicated_table_attach/test.py b/tests/integration/test_replicated_table_attach/test.py index 499220def2c..de60b7ec291 100644 --- a/tests/integration/test_replicated_table_attach/test.py +++ b/tests/integration/test_replicated_table_attach/test.py @@ -41,6 +41,7 @@ def start_clean_clickhouse(): def test_startup_with_small_bg_pool(started_cluster): start_clean_clickhouse() + node.query("DROP TABLE IF EXISTS replicated_table SYNC") node.query( "CREATE TABLE replicated_table (k UInt64, i32 Int32) ENGINE=ReplicatedMergeTree('/clickhouse/replicated_table', 'r1') ORDER BY k" ) @@ -54,11 +55,10 @@ def test_startup_with_small_bg_pool(started_cluster): node.restart_clickhouse(stop_start_wait_sec=10) assert_values() - node.query("DROP TABLE replicated_table SYNC") - def test_startup_with_small_bg_pool_partitioned(started_cluster): start_clean_clickhouse() + node.query("DROP TABLE IF EXISTS replicated_table_partitioned SYNC") node.query( "CREATE TABLE replicated_table_partitioned (k UInt64, i32 Int32) ENGINE=ReplicatedMergeTree('/clickhouse/replicated_table_partitioned', 'r1') ORDER BY k" ) @@ -81,5 +81,3 @@ def test_startup_with_small_bg_pool_partitioned(started_cluster): # check that we activate it in the end node.query_with_retry("INSERT INTO replicated_table_partitioned VALUES(20, 30)") - - node.query("DROP TABLE replicated_table_partitioned SYNC") From de99ee1b05e68b964535664d4197afd0944d0261 Mon Sep 17 00:00:00 2001 From: divanik Date: Tue, 30 Jul 2024 12:07:31 +0000 Subject: [PATCH 466/661] Change docs --- tests/integration/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/README.md b/tests/integration/README.md index d5137a9c148..c1eb511fa44 100644 --- a/tests/integration/README.md +++ b/tests/integration/README.md @@ -137,7 +137,7 @@ cd docker/test/integration/runner docker build -t clickhouse/integration-test-runner . ``` -Also you need to add option --network=host if you rebuild image for a local integration testsing. +If your docker configuration doesn't allow access to public internet with docker build command you may also need to add option --network=host if you rebuild image for a local integration testsing. ### Adding new tests From c3e8825c8f14bb82f60c41754a021813d3dbc8aa Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Tue, 30 Jul 2024 14:23:32 +0200 Subject: [PATCH 467/661] Use correct order of fields in StorageURLSource --- src/Storages/StorageURL.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Storages/StorageURL.h b/src/Storages/StorageURL.h index 63d01a02417..6c95cad474c 100644 --- a/src/Storages/StorageURL.h +++ b/src/Storages/StorageURL.h @@ -228,12 +228,12 @@ private: bool need_only_count; size_t total_rows_in_file = 0; + Poco::Net::HTTPBasicCredentials credentials; + std::unique_ptr read_buf; std::shared_ptr input_format; std::unique_ptr pipeline; std::unique_ptr reader; - - Poco::Net::HTTPBasicCredentials credentials; }; class StorageURLSink : public SinkToStorage From 8b52d7b711d54f1d4bb5b2f39bf4aea3966f64dc Mon Sep 17 00:00:00 2001 From: Michael Stetsyuk Date: Tue, 30 Jul 2024 13:35:19 +0100 Subject: [PATCH 468/661] fxs --- src/IO/S3/Client.cpp | 19 +++++++++---------- src/IO/S3/Client.h | 2 +- .../test_checking_s3_blobs_paranoid/test.py | 12 ++++++++++++ tests/integration/test_storage_delta/test.py | 12 ++++++++++++ 4 files changed, 34 insertions(+), 11 deletions(-) diff --git a/src/IO/S3/Client.cpp b/src/IO/S3/Client.cpp index 7196dfa9bdc..8f037ea71be 100644 --- a/src/IO/S3/Client.cpp +++ b/src/IO/S3/Client.cpp @@ -385,10 +385,9 @@ Model::HeadObjectOutcome Client::HeadObject(HeadObjectRequest & request) const request.overrideURI(std::move(*bucket_uri)); - if (isClientForDisk() && error.GetErrorType() == Aws::S3::S3Errors::NO_SUCH_KEY) - CurrentMetrics::add(CurrentMetrics::S3DiskNoKeyErrors); - - return enrichErrorMessage( + /// The next call is NOT a recurcive call + /// This is a virtuall call Aws::S3::S3Client::HeadObject(const Model::HeadObjectRequest&) + return processRequestResult( HeadObject(static_cast(request))); } @@ -409,11 +408,8 @@ Model::ListObjectsOutcome Client::ListObjects(ListObjectsRequest & request) cons Model::GetObjectOutcome Client::GetObject(GetObjectRequest & request) const { - auto resp = doRequest(request, [this](const Model::GetObjectRequest & req) { return GetObject(req); }); - if (!resp.IsSuccess() && isClientForDisk() && resp.GetError().GetErrorType() == Aws::S3::S3Errors::NO_SUCH_KEY) - CurrentMetrics::add(CurrentMetrics::S3DiskNoKeyErrors); - - return enrichErrorMessage(std::move(resp)); + return processRequestResult( + doRequest(request, [this](const Model::GetObjectRequest & req) { return GetObject(req); })); } Model::AbortMultipartUploadOutcome Client::AbortMultipartUpload(AbortMultipartUploadRequest & request) const @@ -699,11 +695,14 @@ Client::doRequestWithRetryNetworkErrors(RequestType & request, RequestFn request } template -RequestResult Client::enrichErrorMessage(RequestResult && outcome) const +RequestResult Client::processRequestResult(RequestResult && outcome) const { if (outcome.IsSuccess() || !isClientForDisk()) return std::forward(outcome); + if (outcome.GetError().GetErrorType() == Aws::S3::S3Errors::NO_SUCH_KEY) + CurrentMetrics::add(CurrentMetrics::S3DiskNoKeyErrors); + String enriched_message = fmt::format( "{} {}", outcome.GetError().GetMessage(), diff --git a/src/IO/S3/Client.h b/src/IO/S3/Client.h index 11cace4e1fd..e54953419e1 100644 --- a/src/IO/S3/Client.h +++ b/src/IO/S3/Client.h @@ -274,7 +274,7 @@ private: void insertRegionOverride(const std::string & bucket, const std::string & region) const; template - RequestResult enrichErrorMessage(RequestResult && outcome) const; + RequestResult processRequestResult(RequestResult && outcome) const; String initial_endpoint; std::shared_ptr credentials_provider; diff --git a/tests/integration/test_checking_s3_blobs_paranoid/test.py b/tests/integration/test_checking_s3_blobs_paranoid/test.py index 1ed70e20b79..dde636b5d29 100644 --- a/tests/integration/test_checking_s3_blobs_paranoid/test.py +++ b/tests/integration/test_checking_s3_blobs_paranoid/test.py @@ -689,3 +689,15 @@ def test_no_key_found_disk(cluster, broken_s3): "DB::Exception: The specified key does not exist. This error happened for S3 disk." in error ) + + s3_disk_no_key_errors_metric_value = int( + node.query( + """ + SELECT value + FROM system.metrics + WHERE metric = 'S3DiskNoKeyErrors' + """ + ).strip() + ) + + assert s3_disk_no_key_errors_metric_value > 0 diff --git a/tests/integration/test_storage_delta/test.py b/tests/integration/test_storage_delta/test.py index d3dd7cfe52a..67cc7cdd6da 100644 --- a/tests/integration/test_storage_delta/test.py +++ b/tests/integration/test_storage_delta/test.py @@ -452,6 +452,18 @@ def test_restart_broken(started_cluster): f"SELECT count() FROM {TABLE_NAME}" ) + s3_disk_no_key_errors_metric_value = int( + instance.query( + """ + SELECT value + FROM system.metrics + WHERE metric = 'S3DiskNoKeyErrors' + """ + ).strip() + ) + + assert s3_disk_no_key_errors_metric_value == 0 + minio_client.make_bucket(bucket) upload_directory(minio_client, bucket, f"/{TABLE_NAME}", "") From d69f6cccde7633214eba48c08d7647e4ea9a40da Mon Sep 17 00:00:00 2001 From: kssenii Date: Tue, 30 Jul 2024 15:08:26 +0200 Subject: [PATCH 469/661] Fix --- src/Common/ThreadPoolTaskTracker.cpp | 5 ++++- src/IO/WriteBufferFromS3.cpp | 9 ++++----- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/src/Common/ThreadPoolTaskTracker.cpp b/src/Common/ThreadPoolTaskTracker.cpp index 61d34801f7a..1697a13f780 100644 --- a/src/Common/ThreadPoolTaskTracker.cpp +++ b/src/Common/ThreadPoolTaskTracker.cpp @@ -19,6 +19,10 @@ TaskTracker::TaskTracker(ThreadPoolCallbackRunnerUnsafe scheduler_, size_t TaskTracker::~TaskTracker() { + /// Tasks should be waited outside of dtor. + /// Important for WriteBufferFromS3/AzureBlobStorage, where TaskTracker is currently used. + chassert(finished_futures.empty() && futures.empty()); + safeWaitAll(); } @@ -170,4 +174,3 @@ bool TaskTracker::isAsync() const } } - diff --git a/src/IO/WriteBufferFromS3.cpp b/src/IO/WriteBufferFromS3.cpp index 3682e49b018..e702b4d35ad 100644 --- a/src/IO/WriteBufferFromS3.cpp +++ b/src/IO/WriteBufferFromS3.cpp @@ -277,12 +277,10 @@ WriteBufferFromS3::~WriteBufferFromS3() "The file might not be written to S3. " "{}.", getVerboseLogDetails()); - return; } - - /// That destructor could be call with finalized=false in case of exceptions - if (!finalized && !canceled) + else if (!finalized) { + /// That destructor could be call with finalized=false in case of exceptions LOG_INFO( log, "WriteBufferFromS3 is not finalized in destructor. " @@ -291,9 +289,10 @@ WriteBufferFromS3::~WriteBufferFromS3() getVerboseLogDetails()); } + /// Wait for all tasks, because they contain reference to this write buffer. task_tracker->safeWaitAll(); - if (!multipart_upload_id.empty() && !multipart_upload_finished) + if (!canceled && !multipart_upload_id.empty() && !multipart_upload_finished) { LOG_WARNING(log, "WriteBufferFromS3 was neither finished nor aborted, try to abort upload in destructor. {}.", getVerboseLogDetails()); tryToAbortMultipartUpload(); From 20faed85ca30c6352fd091e8d4d763fb98fe1311 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Tue, 30 Jul 2024 15:18:37 +0200 Subject: [PATCH 470/661] Remove useless file --- tests/queries/0_stateless/data_minio/test :: 03215_archive.csv | 1 - 1 file changed, 1 deletion(-) delete mode 100644 tests/queries/0_stateless/data_minio/test :: 03215_archive.csv diff --git a/tests/queries/0_stateless/data_minio/test :: 03215_archive.csv b/tests/queries/0_stateless/data_minio/test :: 03215_archive.csv deleted file mode 100644 index d00491fd7e5..00000000000 --- a/tests/queries/0_stateless/data_minio/test :: 03215_archive.csv +++ /dev/null @@ -1 +0,0 @@ -1 From 21aa514c80cb463f079f9877ae97048a8b13dfbe Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Tue, 30 Jul 2024 15:03:09 +0100 Subject: [PATCH 471/661] don't run removed tests --- tests/performance/scripts/entrypoint.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/performance/scripts/entrypoint.sh b/tests/performance/scripts/entrypoint.sh index 0c3bfa550f4..db7d96ad150 100755 --- a/tests/performance/scripts/entrypoint.sh +++ b/tests/performance/scripts/entrypoint.sh @@ -118,7 +118,7 @@ then # far in the future and have unrelated test changes. base=$(git -C right/ch merge-base pr origin/master) git -C right/ch diff --name-only "$base" pr -- . | tee all-changed-files.txt - git -C right/ch diff --name-only "$base" pr -- tests/performance/*.xml | tee changed-test-definitions.txt + git -C right/ch diff --name-only --diff-filter=d "$base" pr -- tests/performance/*.xml | tee changed-test-definitions.txt git -C right/ch diff --name-only "$base" pr -- :!tests/performance/*.xml :!docker/test/performance-comparison | tee other-changed-files.txt fi From 7d5c30e76cf0fd17515803fec96899f4aad1294e Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Tue, 30 Jul 2024 16:26:19 +0200 Subject: [PATCH 472/661] No retries when partitioned --- tests/integration/test_keeper_map/test.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/tests/integration/test_keeper_map/test.py b/tests/integration/test_keeper_map/test.py index 31316af7b1e..7aee5df5746 100644 --- a/tests/integration/test_keeper_map/test.py +++ b/tests/integration/test_keeper_map/test.py @@ -46,7 +46,11 @@ def assert_keeper_exception_after_partition(query): with PartitionManager() as pm: pm.drop_instance_zk_connections(node) try: - error = node.query_and_get_error_with_retry(query, sleep_time=1) + error = node.query_and_get_error_with_retry( + query, + sleep_time=1, + settings={"insert_keeper_max_retries": 1, "keeper_max_retries": 1}, + ) assert "Coordination::Exception" in error except: print_iptables_rules() @@ -84,7 +88,9 @@ def test_keeper_map_without_zk(started_cluster): node.restart_clickhouse(60) try: error = node.query_and_get_error_with_retry( - "SELECT * FROM test_keeper_map_without_zk", sleep_time=1 + "SELECT * FROM test_keeper_map_without_zk", + sleep_time=1, + settings={"keeper_max_retries": 1}, ) assert "Failed to activate table because of connection issues" in error except: From 83c6d97cd2aae0a3f79c2776ffc3a5691f8fd4bb Mon Sep 17 00:00:00 2001 From: maxvostrikov Date: Tue, 30 Jul 2024 17:34:38 +0200 Subject: [PATCH 473/661] squash! added somme tests in relation with https://github.com/ClickHouse/ClickHouse/pull/54881 with new behaviour when enable_named_columns_in_function_tuple=1 (default value) --- .../0_stateless/00309_formats.reference | Bin 20353 -> 18666 bytes tests/queries/0_stateless/00309_formats.sql | 5 +++++ 2 files changed, 5 insertions(+) diff --git a/tests/queries/0_stateless/00309_formats.reference b/tests/queries/0_stateless/00309_formats.reference index cab311692be229716b58af39079275d3942b01cc..f3ea45520bb50fb936caf6724e9fedf3cdd00b75 100644 GIT binary patch delta 3052 zcmeIy$4}c(00wZIgtksdZ-w^I3JD}eL2?j6{R4XG1$t-?y;Q0c4wb5?2YL>Bj|8*# z-Xp>6y?59U_TGDMNaN%a?SDX3#j^bK`<|bk?ZfAlN^UDiNE{JVHgaD5e8+19DxExLIy^` zXcz-yVH}Ky2`~{R!DN^MQ}xOSA`L-0%z&9N3ueO{m<#h@J}iKRum~2z5?IQq+n-P( zP{A_D!g5#vD`6F^hBdGj*1>w%02^VGPHumqnY-wY76U{pY=iBv19rkL*bRGNFYJT; zPz?v*AoeGQ5Ddc+I10z$IGlhQI0>iVG@OC6a1PGH1;h6f#3F(vxC~d|DqMr>a070_ zEw~ML;4a*Q`|v>j|8;{y1V``~p1@Oh2G8LIyo6Wq8s5NLcn9yHg%&qjk~HD*O`kk{ zMhS#wBF4@26;n)ok$HM~`}q3#2LuKMhlGZON5~bCOjLACY+QUoVp4KSYFc_mW>$7i zZeD&tVNr2OsZv$OmRD3(RoB$k)i*RYHMg|3wRd!Ob@%l4^{WR4hlWQ+$Hpf#lT*_( zvvczci%ZKZt842Un_JsEyL2uP zs~KC(*lPbltFhlLRQ$(oSyIc$t;|{F>ph;XVgHfpC$+5H%8ajnp%+Y<&DY2OORU_k z(V7&oZmv$Gj7LgIiK)l0pFiA9KYx&VyyL(B=HYJsi3@C{mioxSQew%acGgGsx>HI? tDE$HfH|yd|adXt)D_oo(mI%Z&HeM!>@7a6Ht;zJ#Jk0d+*gt^JbdYKDu4%*X9d$a^ZOYUl#FBHF2yr}q<;@67HiYtm^#Z|>iir*-% zDXuF{6fY}&tN5Mb_lj2(e^C5U@h8Qria#snd+xpO{s$g>=;22meeCfko}78A|MWA@J~z8>|MLgJ!58LIidkNiRoxsq zJUB8u`r@(UFP(V#6D$6wrgio0oJnBji=r;A-&v0K-- z%ZqMZ-|En<>suYVb$zQtx30$|*-G(uTzhnP-g&=^d;Asr39~@AuK#;~vDmn-A16x* zR`tnxBCGmdyM|AAwVs$&ebTP#ldV?ulqNAv(}O7;BueJfKp}=K;v>=KamanSU6UWrDN?_JXVk8gMI`72?`P<#P@i7&<`pS zWF+WF5R#xIK}v#_1ThI}667T4Nf4BvC_z$!rUX$5suE-+=t>Zlpe#XJg0=*43F;E$ zCFn~Kn4mC0VuHp5kqIgjWG3iL5SpMgL28251hENf6XYi7O%R-*I6-oP<^<6RsuN@< z=uQxxpgci(g7yUQ3F;H%C+JUb0Ko$U7Z7|va00;#1UC@;KyU=X69iWfd_iyq!5aj3 z5d48~#s?7ye?<5r!XFX-i10^*KO+1Q;g1M^M8t7~KO+1Q;g1M^MEE1Z9})hD9M2rk z64B_#G0j2EQO#k^am|4Ze?<5r!XFX-i0}tWF!-P!{1M@g2!BNQBf=jM{)q5Lgg+wu z5#f&re?<5r!XFX-i10^*KO+1Q;g1M^#G){NzzgHs7A`ETZCepq_933?i~U2rWWJXqI8KuOcyVRaKQz-H_k$cyu3VDiwye}g+f_bcJ1@D;TzuN5 zD@s+avnf(-W3}#V+EUgwouv-u47MnGiFGeI6M7J0X>pSftOBKaoh3fnm@GvczL`Ta z^L_N{6Fw0i&Ve~T1wJJ{6+SgSO&=q%!s~pgh07?nk!+)EGuh^{Eo58Dw&Hr*^k(>Z zybQ>^Hqtrc{32`%3@;;YAm1kG-$5QXD0j)XNqnD@?1}G`_*RMUmH1|f@0J>DqOcTU z*dnqtku4%yM7D@*5!oWLMaC8xTV$doTV!mJu|>uf8Czs*k+DV27CBqwY>|unY>~4? z&K5aasMw-ni;68Owy4;mVvC9`Dz>QDqGpSlEo!!?*`j8Pnk{O!sM(@si<&JOwrJR* zVT*<>8n$TIqG5}MEgH6H*a8(t*|H6)jlxDpGfP)fUuE8&1TrL@Sn5)r6Ya*MU7eL0wb!Wn76AR5ztX z#+6_}jZ<1=T!|M{I=Mw62Gvg4mvJR%Q1g@)8CT*4l}~AraV2z6|Kt`49@Ic(U&fXA zK_ygLWLya$)I+&Nf(SKH*_Ux8j!+qu78zGU3H4EFk#QxOP$lISi6>M_WnadXkV3sw zT4Y>_DpXCSg~!FbC|e#E)l=EBEh?z8Wm{BHWy|BDk}6xVZKpC4TZt>wRQasL73!+8 zFSZg_sIE$j*tQdl#8s5ILYJI}Nejl>nIuG~}N3YAyc7h8!d)L*4V z#+A526;@hgT!|~xV!1`)3iVjom;05tLRD5;5#VP`KA{>G~DrX+|J4NE4^tt78W7My1Ewbk!9(?&_}d~o*M=)&sS=v zm5YR#+gx;@nz8VMIrYYX7g?<00RC^)91khQIPJmG@OrgrFt^k;LYv_Lo5d@`!DxB3 zGDLsD_K3_;G)I{^%FR(>j!JV>nWNeqHPfRx<;2)BT_0Y1Xf&A9h!Y`)0|nvDH@x8n zHr{gEJg2=CS+6-2Y@=}a^V*~wFA#5d<+f6RtRKlbl5c8*WMo}Q)|X_RN!FWW-AUK{ zDc6tAH={hZdv7)Rc=}3mQ`+9<(5@$S>={hVObTHRbg2bmQ`U{6_yqMxorr-!m=tXtHQD> zEvwS9DlMzhvMMdB(y}TotJ1P6EvwS9sw}I@vZ^er%Cf2~tID#fEUU`0sw}I@vZ^hs z+On!GtJ<=vEvwqHsx7P9vZ^hs+OldatH!cwEUU({YAmb9vT7`=#}TLyo>A3TXJ|u*PXZY@b0cX?~)z)dEQU-ex~gv+M8t>k(P&*cKVkL~(9{aiZ0vklwdB?P?7?an*> zTu#9I-mbsX&!q*_8f<@;81VeWop<`V+<^DVU4N&aOAmPG-1T?*xeS5#(rtg2Bk;7w zop<`VG=X>8U4N&a%M^I;-Sv0+xnzOoK(@b27kFphop<`VjDh#+U4N&aOB#6h-u8EC z1J9b=d8ePt9C&Zv^>_NY_NY{DCT;>+ke)`2)`|ZGV?P@Y@r2-syKefBb6wVB0s=_`E`L+6m)Ezc_5xV{`a(e6A7cfwpXiGp`$XFl3xq_$>F>d{|a@uQ~ Lvprlx%YOd>&)TR8 diff --git a/tests/queries/0_stateless/00309_formats.sql b/tests/queries/0_stateless/00309_formats.sql index b0939c00a10..0366cdeea5c 100644 --- a/tests/queries/0_stateless/00309_formats.sql +++ b/tests/queries/0_stateless/00309_formats.sql @@ -9,3 +9,8 @@ SELECT number * 246 + 10 AS n, toDate('2000-01-01') + n AS d, range(n) AS arr, a SELECT number * 246 + 10 AS n, toDate('2000-01-01') + n AS d, range(n) AS arr, arrayStringConcat(arrayMap(x -> reinterpretAsString(x), arr)) AS s, (n, d) AS tuple FROM system.numbers LIMIT 2 FORMAT JSON; SELECT number * 246 + 10 AS n, toDate('2000-01-01') + n AS d, range(n) AS arr, arrayStringConcat(arrayMap(x -> reinterpretAsString(x), arr)) AS s, (n, d) AS tuple FROM system.numbers LIMIT 2 FORMAT JSONCompact; SELECT number * 246 + 10 AS n, toDate('2000-01-01') + n AS d, range(n) AS arr, arrayStringConcat(arrayMap(x -> reinterpretAsString(x), arr)) AS s, (n, d) AS tuple FROM system.numbers LIMIT 2 FORMAT XML; + +SET enable_named_columns_in_function_tuple = 1; + +SELECT 36 AS n, toDate('2000-01-01') + n AS d, (n, d) AS tuple FROM system.numbers LIMIT 1 FORMAT RowBinaryWithNamesAndTypes; +SELECT number * 246 + 10 AS n, toDate('2000-01-01') + n AS d, (n, d) AS tuple FROM system.numbers LIMIT 1 FORMAT TabSeparatedWithNamesAndTypes; From aa26291ff25b16539efa3d50a540fc32a05b702d Mon Sep 17 00:00:00 2001 From: Michael Stetsyuk Date: Tue, 30 Jul 2024 18:15:24 +0100 Subject: [PATCH 474/661] fxs --- src/Databases/DatabaseReplicated.cpp | 57 +++++++++++++++---- src/Databases/DatabaseReplicatedWorker.cpp | 21 +++++-- src/Databases/DatabaseReplicatedWorker.h | 4 +- src/Storages/System/StorageSystemClusters.cpp | 6 +- .../test_recovery_time_metric/test.py | 36 ++++++++++-- 5 files changed, 98 insertions(+), 26 deletions(-) diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp index b11b9382732..06cea65d62e 100644 --- a/src/Databases/DatabaseReplicated.cpp +++ b/src/Databases/DatabaseReplicated.cpp @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -340,31 +341,63 @@ ClusterPtr DatabaseReplicated::getClusterImpl(bool all_groups) const ReplicasInfo DatabaseReplicated::tryGetReplicasInfo(const ClusterPtr & cluster_) const { - ReplicasInfo res; + Strings paths_get, paths_exists; + + paths_get.emplace_back(fs::path(zookeeper_path) / "max_log_ptr"); - auto zookeeper = getZooKeeper(); const auto & addresses_with_failover = cluster_->getShardsAddresses(); const auto & shards_info = cluster_->getShardsInfo(); - + for (size_t shard_index = 0; shard_index < shards_info.size(); ++shard_index) + { + for (const auto & replica : addresses_with_failover[shard_index]) + { + String full_name = getFullReplicaName(replica.database_shard_name, replica.database_replica_name); + paths_exists.emplace_back(fs::path(zookeeper_path) / "replicas" / full_name / "active"); + paths_get.emplace_back(fs::path(zookeeper_path) / "replicas" / full_name / "log_ptr"); + } + } + try { - UInt32 max_log_ptr = parse(zookeeper->get(zookeeper_path + "/max_log_ptr")); + auto current_zookeeper = getZooKeeper(); + auto get_res = current_zookeeper->get(paths_get); + auto exist_res = current_zookeeper->exists(paths_exists); + chassert(get_res.size() == exist_res.size() + 1); + auto max_log_ptr_zk = get_res[0]; + if (max_log_ptr_zk.error != Coordination::Error::ZOK) + throw Coordination::Exception(max_log_ptr_zk.error); + + UInt32 max_log_ptr = parse(max_log_ptr_zk.data); + + ReplicasInfo replicas_info; + replicas_info.resize(exist_res.size()); + + size_t global_replica_index = 0; for (size_t shard_index = 0; shard_index < shards_info.size(); ++shard_index) { for (const auto & replica : addresses_with_failover[shard_index]) { - String full_name = getFullReplicaName(replica.database_shard_name, replica.database_replica_name); - UInt32 log_ptr = parse(zookeeper->get(fs::path(zookeeper_path) / "replicas" / full_name / "log_ptr")); - bool is_active = zookeeper->exists(fs::path(zookeeper_path) / "replicas" / full_name / "active"); - res.push_back(ReplicaInfo{ - .is_active = is_active, - .replication_lag = max_log_ptr - log_ptr, + auto replica_active = exist_res[global_replica_index]; + auto replica_log_ptr = get_res[global_replica_index + 1]; + + if (replica_active.error != Coordination::Error::ZOK && replica_active.error != Coordination::Error::ZNONODE) + throw Coordination::Exception(replica_active.error); + + if (replica_log_ptr.error != Coordination::Error::ZOK) + throw Coordination::Exception(replica_log_ptr.error); + + replicas_info[global_replica_index] = ReplicaInfo{ + .is_active = replica_active.error == Coordination::Error::ZOK, + .replication_lag = max_log_ptr - parse(replica_log_ptr.data), .recovery_time = replica.is_local ? ddl_worker->getCurrentInitializationDurationMs() : 0, - }); + }; + + ++global_replica_index; } } - return res; + + return replicas_info; } catch (...) { tryLogCurrentException(log); diff --git a/src/Databases/DatabaseReplicatedWorker.cpp b/src/Databases/DatabaseReplicatedWorker.cpp index a9a74c5f56a..4e7408aa96e 100644 --- a/src/Databases/DatabaseReplicatedWorker.cpp +++ b/src/Databases/DatabaseReplicatedWorker.cpp @@ -32,8 +32,11 @@ DatabaseReplicatedDDLWorker::DatabaseReplicatedDDLWorker(DatabaseReplicated * db bool DatabaseReplicatedDDLWorker::initializeMainThread() { - initialization_duration_timer.restart(); - initializing.store(true, std::memory_order_release); + { + std::lock_guard lock(initialization_duration_timer_mutex); + initialization_duration_timer.emplace(); + initialization_duration_timer->start(); + } while (!stop_flag) { @@ -72,7 +75,10 @@ bool DatabaseReplicatedDDLWorker::initializeMainThread() initializeReplication(); initialized = true; - initializing.store(false, std::memory_order_relaxed); + { + std::lock_guard lock(initialization_duration_timer_mutex); + initialization_duration_timer.reset(); + } return true; } catch (...) @@ -82,7 +88,11 @@ bool DatabaseReplicatedDDLWorker::initializeMainThread() } } - initializing.store(false, std::memory_order_relaxed); + { + std::lock_guard lock(initialization_duration_timer_mutex); + initialization_duration_timer.reset(); + } + return false; } @@ -466,7 +476,8 @@ UInt32 DatabaseReplicatedDDLWorker::getLogPointer() const UInt64 DatabaseReplicatedDDLWorker::getCurrentInitializationDurationMs() const { - return initializing.load(std::memory_order_acquire) ? initialization_duration_timer.elapsedMilliseconds() : 0; + std::lock_guard lock(initialization_duration_timer_mutex); + return initialization_duration_timer ? initialization_duration_timer->elapsedMilliseconds() : 0; } } diff --git a/src/Databases/DatabaseReplicatedWorker.h b/src/Databases/DatabaseReplicatedWorker.h index 3e5887be825..2309c831839 100644 --- a/src/Databases/DatabaseReplicatedWorker.h +++ b/src/Databases/DatabaseReplicatedWorker.h @@ -59,8 +59,8 @@ private: /// It will remove "active" node when database is detached zkutil::EphemeralNodeHolderPtr active_node_holder; - Stopwatch initialization_duration_timer; - std::atomic initializing = false; + std::optional initialization_duration_timer; + mutable std::mutex initialization_duration_timer_mutex; }; } diff --git a/src/Storages/System/StorageSystemClusters.cpp b/src/Storages/System/StorageSystemClusters.cpp index 0da4bd70cbd..4b9802c9089 100644 --- a/src/Storages/System/StorageSystemClusters.cpp +++ b/src/Storages/System/StorageSystemClusters.cpp @@ -71,7 +71,7 @@ void StorageSystemClusters::writeCluster(MutableColumns & res_columns, const Nam const auto & shards_info = cluster->getShardsInfo(); const auto & addresses_with_failover = cluster->getShardsAddresses(); - size_t replica_idx = 0; + size_t global_replica_idx = 0; for (size_t shard_index = 0; shard_index < shards_info.size(); ++shard_index) { const auto & shard_info = shards_info[shard_index]; @@ -108,7 +108,7 @@ void StorageSystemClusters::writeCluster(MutableColumns & res_columns, const Nam } else { - const auto & replica_info = replicas_info[replica_idx++]; + const auto & replica_info = replicas_info[global_replica_idx]; res_columns[i++]->insert(replica_info.is_active); res_columns[i++]->insert(replica_info.replication_lag); if (replica_info.recovery_time != 0) @@ -116,6 +116,8 @@ void StorageSystemClusters::writeCluster(MutableColumns & res_columns, const Nam else res_columns[i++]->insertDefault(); } + + ++global_replica_idx; } } } diff --git a/tests/integration/test_recovery_time_metric/test.py b/tests/integration/test_recovery_time_metric/test.py index 4dad844b950..8f369d7759c 100644 --- a/tests/integration/test_recovery_time_metric/test.py +++ b/tests/integration/test_recovery_time_metric/test.py @@ -5,7 +5,6 @@ cluster = ClickHouseCluster(__file__) node = cluster.add_instance( "node", main_configs=["configs/config.xml"], - with_zookeeper=True, stay_alive=True, ) @@ -21,9 +20,36 @@ def start_cluster(): def test_recovery_time_metric(start_cluster): node.query( - "CREATE DATABASE rdb ENGINE = Replicated('/test/test_recovery_time_metric', 'shard1', 'replica1');" + """ + CREATE DATABASE rdb + ENGINE = Replicated('/test/test_recovery_time_metric', 'shard1', 'replica1') + """ ) - node.query("CREATE TABLE rdb.t (x UInt32) ENGINE = MergeTree ORDER BY x;") - node.exec_in_container(["bash", "-c", "rm /var/lib/clickhouse/metadata/rdb/t.sql"]) + + node.query( + """ + CREATE TABLE rdb.t + ( + `x` UInt32 + ) + ENGINE = MergeTree + ORDER BY x + """ + ) + + node.exec_in_container( + ["bash", "-c", "rm /var/lib/clickhouse/metadata/rdb/t.sql"] + ) + node.restart_clickhouse() - assert node.query("SELECT any(recovery_time) FROM system.clusters;") != "0\n" + + ret = int( + node.query( + """ + SELECT recovery_time + FROM system.clusters + WHERE cluster = 'rdb' + """ + ).strip() + ) + assert ret > 0 From 7e51e9962c34320a9c60ba6abcf8b38cf517e86c Mon Sep 17 00:00:00 2001 From: Alex Katsman Date: Tue, 30 Jul 2024 17:20:07 +0000 Subject: [PATCH 475/661] Fix WriteBuffer destructor when finalize has failed for MergeTreeDeduplicationLog::shutdown --- src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp b/src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp index 22ff9b7194f..a8110500f13 100644 --- a/src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp +++ b/src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp @@ -341,15 +341,19 @@ void MergeTreeDeduplicationLog::shutdown() stopped = true; if (current_writer) { + /// If an error has occurred during finalize, we'd like to have the exception set for reset. + /// Otherwise, we'll be in a situation when a finalization didn't happen, and we didn't get + /// any error, causing logical error (see ~MemoryBuffer()). try { current_writer->finalize(); + current_writer.reset(); } catch (...) { tryLogCurrentException(__PRETTY_FUNCTION__); + current_writer.reset(); } - current_writer.reset(); } } From 51af0d305c9959fb4870bd8a57035d48207648b2 Mon Sep 17 00:00:00 2001 From: Alexander Gololobov Date: Tue, 30 Jul 2024 20:00:23 +0200 Subject: [PATCH 476/661] Reduce number of tested combinations --- tests/queries/0_stateless/02473_multistep_prewhere.python | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02473_multistep_prewhere.python b/tests/queries/0_stateless/02473_multistep_prewhere.python index a942568233c..11095202039 100644 --- a/tests/queries/0_stateless/02473_multistep_prewhere.python +++ b/tests/queries/0_stateless/02473_multistep_prewhere.python @@ -193,7 +193,7 @@ def main(): url = os.environ["CLICKHOUSE_URL"] + "&max_threads=1" default_index_granularity = 10 - total_rows = 8 * default_index_granularity + total_rows = 7 * default_index_granularity step = default_index_granularity session = requests.Session() for index_granularity in [ From dacf044c3dee65d799242b7f4846f7d6d8b2bd34 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Tue, 30 Jul 2024 19:20:52 +0000 Subject: [PATCH 477/661] Update version_date.tsv and changelogs after v24.7.1.2915-stable --- SECURITY.md | 3 +- docker/keeper/Dockerfile | 2 +- docker/server/Dockerfile.alpine | 2 +- docker/server/Dockerfile.ubuntu | 2 +- docs/changelogs/v24.7.1.2915-stable.md | 524 +++++++++++++++++++++++++ utils/list-versions/version_date.tsv | 1 + 6 files changed, 530 insertions(+), 4 deletions(-) create mode 100644 docs/changelogs/v24.7.1.2915-stable.md diff --git a/SECURITY.md b/SECURITY.md index 53328b6e16b..8930dc96f8a 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -14,9 +14,10 @@ The following versions of ClickHouse server are currently supported with securit | Version | Supported | |:-|:-| +| 24.7 | ✔️ | | 24.6 | ✔️ | | 24.5 | ✔️ | -| 24.4 | ✔️ | +| 24.4 | ❌ | | 24.3 | ✔️ | | 24.2 | ❌ | | 24.1 | ❌ | diff --git a/docker/keeper/Dockerfile b/docker/keeper/Dockerfile index c59ef1b919a..e99c86267f9 100644 --- a/docker/keeper/Dockerfile +++ b/docker/keeper/Dockerfile @@ -34,7 +34,7 @@ RUN arch=${TARGETARCH:-amd64} \ # lts / testing / prestable / etc ARG REPO_CHANNEL="stable" ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}" -ARG VERSION="24.6.2.17" +ARG VERSION="24.7.1.2915" ARG PACKAGES="clickhouse-keeper" ARG DIRECT_DOWNLOAD_URLS="" diff --git a/docker/server/Dockerfile.alpine b/docker/server/Dockerfile.alpine index 240df79aeb1..fb562b911a3 100644 --- a/docker/server/Dockerfile.alpine +++ b/docker/server/Dockerfile.alpine @@ -32,7 +32,7 @@ RUN arch=${TARGETARCH:-amd64} \ # lts / testing / prestable / etc ARG REPO_CHANNEL="stable" ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}" -ARG VERSION="24.6.2.17" +ARG VERSION="24.7.1.2915" ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" ARG DIRECT_DOWNLOAD_URLS="" diff --git a/docker/server/Dockerfile.ubuntu b/docker/server/Dockerfile.ubuntu index ac64655991a..51f4e6a0f40 100644 --- a/docker/server/Dockerfile.ubuntu +++ b/docker/server/Dockerfile.ubuntu @@ -28,7 +28,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list ARG REPO_CHANNEL="stable" ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main" -ARG VERSION="24.6.2.17" +ARG VERSION="24.7.1.2915" ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" #docker-official-library:off diff --git a/docs/changelogs/v24.7.1.2915-stable.md b/docs/changelogs/v24.7.1.2915-stable.md new file mode 100644 index 00000000000..abffbe58bfc --- /dev/null +++ b/docs/changelogs/v24.7.1.2915-stable.md @@ -0,0 +1,524 @@ +--- +sidebar_position: 1 +sidebar_label: 2024 +--- + +# 2024 Changelog + +### ClickHouse release v24.7.1.2915-stable (a37d2d43da7) FIXME as compared to v24.7.1.1-new (aa023477a92) + +#### Backward Incompatible Change +* Change binary serialization of Variant data type: add `compact` mode to avoid writing the same discriminator multiple times for granules with single variant or with only NULL values. Add MergeTree setting `use_compact_variant_discriminators_serialization` that is enabled by default. Note that Variant type is still experimental and backward-incompatible change in serialization is ok. [#62774](https://github.com/ClickHouse/ClickHouse/pull/62774) ([Kruglov Pavel](https://github.com/Avogar)). +* Forbid `CREATE MATERIALIZED VIEW ... ENGINE Replicated*MergeTree POPULATE AS SELECT ...` with Replicated databases. [#63963](https://github.com/ClickHouse/ClickHouse/pull/63963) ([vdimir](https://github.com/vdimir)). +* `clickhouse-keeper-client` will only accept paths in string literals, such as `ls '/hello/world'`, not bare strings such as `ls /hello/world`. [#65494](https://github.com/ClickHouse/ClickHouse/pull/65494) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Metric `KeeperOutstandingRequets` was renamed to `KeeperOutstandingRequests`. This fixes a typo reported in [#66179](https://github.com/ClickHouse/ClickHouse/issues/66179). [#66206](https://github.com/ClickHouse/ClickHouse/pull/66206) ([Robert Schulze](https://github.com/rschu1ze)). +* Remove `is_deterministic` field from the `system.functions` table. [#66630](https://github.com/ClickHouse/ClickHouse/pull/66630) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### New Feature +* Extend function `tuple` to construct named tuples in query. Introduce function `tupleNames` to extract names from tuples. [#54881](https://github.com/ClickHouse/ClickHouse/pull/54881) ([Amos Bird](https://github.com/amosbird)). +* `ASOF JOIN` support for `full_sorting_join` algorithm Close [#54493](https://github.com/ClickHouse/ClickHouse/issues/54493). [#55051](https://github.com/ClickHouse/ClickHouse/pull/55051) ([vdimir](https://github.com/vdimir)). +* A new table function, `fuzzQuery,` was added. This function allows you to modify a given query string with random variations. Example: `SELECT query FROM fuzzQuery('SELECT 1');`. [#62103](https://github.com/ClickHouse/ClickHouse/pull/62103) ([pufit](https://github.com/pufit)). +* Add new window function `percent_rank`. [#62747](https://github.com/ClickHouse/ClickHouse/pull/62747) ([lgbo](https://github.com/lgbo-ustc)). +* Support JWT authentication in `clickhouse-client`. [#62829](https://github.com/ClickHouse/ClickHouse/pull/62829) ([Konstantin Bogdanov](https://github.com/thevar1able)). +* Add SQL functions `changeYear`, `changeMonth`, `changeDay`, `changeHour`, `changeMinute`, `changeSecond`. For example, `SELECT changeMonth(toDate('2024-06-14'), 7)` returns date `2024-07-14`. [#63186](https://github.com/ClickHouse/ClickHouse/pull/63186) ([cucumber95](https://github.com/cucumber95)). +* Introduce startup scripts, which allow the execution of preconfigured queries at the startup stage. [#64889](https://github.com/ClickHouse/ClickHouse/pull/64889) ([pufit](https://github.com/pufit)). +* Support accept_invalid_certificate in client's config in order to allow for client to connect over secure TCP to a server running with self-signed certificate - can be used as a shorthand for corresponding `openSSL` client settings `verificationMode=none` + `invalidCertificateHandler.name=AcceptCertificateHandler`. [#65238](https://github.com/ClickHouse/ClickHouse/pull/65238) ([peacewalker122](https://github.com/peacewalker122)). +* Add system.error_log which contains history of error values from table system.errors, periodically flushed to disk. [#65381](https://github.com/ClickHouse/ClickHouse/pull/65381) ([Pablo Marcos](https://github.com/pamarcos)). +* Add aggregate function `groupConcat`. About the same as `arrayStringConcat( groupArray(column), ',')` Can receive 2 parameters: a string delimiter and the number of elements to be processed. [#65451](https://github.com/ClickHouse/ClickHouse/pull/65451) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Add AzureQueue storage. [#65458](https://github.com/ClickHouse/ClickHouse/pull/65458) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Add a new setting to disable/enable writing page index into parquet files. [#65475](https://github.com/ClickHouse/ClickHouse/pull/65475) ([lgbo](https://github.com/lgbo-ustc)). +* Allow system administrators to configure `logger.console_log_level`. [#65559](https://github.com/ClickHouse/ClickHouse/pull/65559) ([Azat Khuzhin](https://github.com/azat)). +* Automatically append a wildcard `*` to the end of a directory path with table function `file`. [#66019](https://github.com/ClickHouse/ClickHouse/pull/66019) ([Zhidong (David) Guo](https://github.com/Gun9niR)). +* Add `--memory-usage` option to client in non interactive mode. [#66393](https://github.com/ClickHouse/ClickHouse/pull/66393) ([vdimir](https://github.com/vdimir)). + +#### Performance Improvement +* Enable `optimize_functions_to_subcolumns` by default. [#58661](https://github.com/ClickHouse/ClickHouse/pull/58661) ([Anton Popov](https://github.com/CurtizJ)). +* Replace int to string algorithm with a faster one (from a modified amdn/itoa to a modified jeaiii/itoa). [#61661](https://github.com/ClickHouse/ClickHouse/pull/61661) ([Raúl Marín](https://github.com/Algunenano)). +* Sizes of hash tables created by join (`parallel_hash` algorithm) is collected and cached now. This information will be used to preallocate space in hash tables for subsequent query executions and save time on hash table resizes. [#64553](https://github.com/ClickHouse/ClickHouse/pull/64553) ([Nikita Taranov](https://github.com/nickitat)). +* Optimized queries with `ORDER BY` primary key and `WHERE` that have a condition with high selectivity by using of buffering. It is controlled by setting `read_in_order_use_buffering` (enabled by default) and can increase memory usage of query. [#64607](https://github.com/ClickHouse/ClickHouse/pull/64607) ([Anton Popov](https://github.com/CurtizJ)). +* Improve performance of loading `plain_rewritable` metadata. [#65634](https://github.com/ClickHouse/ClickHouse/pull/65634) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Attaching tables on read-only disks will use fewer resources by not loading outdated parts. [#65635](https://github.com/ClickHouse/ClickHouse/pull/65635) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Support minmax hyperrectangle for Set indices. [#65676](https://github.com/ClickHouse/ClickHouse/pull/65676) ([AntiTopQuark](https://github.com/AntiTopQuark)). +* Unload primary index of outdated parts to reduce total memory usage. [#65852](https://github.com/ClickHouse/ClickHouse/pull/65852) ([Anton Popov](https://github.com/CurtizJ)). +* Functions `replaceRegexpAll` and `replaceRegexpOne` are now significantly faster if the pattern is trivial, i.e. contains no metacharacters, pattern classes, flags, grouping characters etc. (Thanks to Taiyang Li). [#66185](https://github.com/ClickHouse/ClickHouse/pull/66185) ([Robert Schulze](https://github.com/rschu1ze)). + +#### Improvement +* Support rocksdb as backend storage of keeper. [#56626](https://github.com/ClickHouse/ClickHouse/pull/56626) ([Han Fei](https://github.com/hanfei1991)). +* The setting `optimize_trivial_insert_select` is disabled by default. In most cases, it should be beneficial. Nevertheless, if you are seeing slower INSERT SELECT or increased memory usage, you can enable it back or `SET compatibility = '24.6'`. [#58970](https://github.com/ClickHouse/ClickHouse/pull/58970) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Print stacktrace and diagnostic info if `clickhouse-client` or `clickhouse-local` crashes. [#61109](https://github.com/ClickHouse/ClickHouse/pull/61109) ([Alexander Tokmakov](https://github.com/tavplubix)). +* The result of `SHOW INDEX | INDEXES | INDICES | KEYS` was previously sorted by the primary key column names. Since this was unintuitive, the result is now sorted by the position of the primary key columns within the primary key. [#61131](https://github.com/ClickHouse/ClickHouse/pull/61131) ([Robert Schulze](https://github.com/rschu1ze)). +* This PR changes how deduplication for MV works. Fixed a lot of cases like: - on destination table: data is split for 2 or more blocks and that blocks is considered as duplicate when that block is inserted in parallel. - on MV destination table: the equal blocks are deduplicated, that happens when MV often produces equal data as a result for different input data due to performing aggregation. - on MV destination table: the equal blocks which comes from different MV are deduplicated. [#61601](https://github.com/ClickHouse/ClickHouse/pull/61601) ([Sema Checherinda](https://github.com/CheSema)). +* Allow matching column names in a case insensitive manner when reading json files (`input_format_json_case_insensitive_column_matching`). [#61750](https://github.com/ClickHouse/ClickHouse/pull/61750) ([kevinyhzou](https://github.com/KevinyhZou)). +* Support reading partitioned data DeltaLake data. Infer DeltaLake schema by reading metadata instead of data. [#63201](https://github.com/ClickHouse/ClickHouse/pull/63201) ([Kseniia Sumarokova](https://github.com/kssenii)). +* In composable protocols TLS layer accepted only `certificateFile` and `privateKeyFile` parameters. https://clickhouse.com/docs/en/operations/settings/composable-protocols. [#63985](https://github.com/ClickHouse/ClickHouse/pull/63985) ([Anton Ivashkin](https://github.com/ianton-ru)). +* Make an interactive client for clickhouse-disks, add local disk from the local directory. Fixes [#56791](https://github.com/ClickHouse/ClickHouse/issues/56791). [#64446](https://github.com/ClickHouse/ClickHouse/pull/64446) ([Daniil Ivanik](https://github.com/divanik)). +* Added profile event `SelectQueriesWithPrimaryKeyUsage` which indicates how many SELECT queries use the primary key to evaluate the WHERE clause. [#64492](https://github.com/ClickHouse/ClickHouse/pull/64492) ([0x01f](https://github.com/0xfei)). +* `StorageS3Queue` related fixes and improvements. Deduce a default value of `s3queue_processing_threads_num` according to the number of physical cpu cores on the server (instead of the previous default value as 1). Set default value of `s3queue_loading_retries` to 10. Fix possible vague "Uncaught exception" in exception column of `system.s3queue`. Do not increment retry count on `MEMORY_LIMIT_EXCEEDED` exception. Move files commit to a stage after insertion into table fully finished to avoid files being commited while not inserted. Add settings `s3queue_max_processed_files_before_commit`, `s3queue_max_processed_rows_before_commit`, `s3queue_max_processed_bytes_before_commit`, `s3queue_max_processing_time_sec_before_commit`, to better control commit and flush time. [#65046](https://github.com/ClickHouse/ClickHouse/pull/65046) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fixed broken multiple columns aggregation on s390x. [#65062](https://github.com/ClickHouse/ClickHouse/pull/65062) ([Harry Lee](https://github.com/HarryLeeIBM)). +* Support aliases in parametrized view function (only new analyzer). [#65190](https://github.com/ClickHouse/ClickHouse/pull/65190) ([Kseniia Sumarokova](https://github.com/kssenii)). +* S3. reduce retires time for queries, increase retries count for backups. 8.5 minutes and 100 retires for queries, 1.2 hours and 1000 retries for backup restore. [#65232](https://github.com/ClickHouse/ClickHouse/pull/65232) ([Sema Checherinda](https://github.com/CheSema)). +* Updated to mask account key in logs in azureBlobStorage. [#65273](https://github.com/ClickHouse/ClickHouse/pull/65273) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Partition pruning for `IN` predicates when filter expression is a part of `PARTITION BY` expression. [#65335](https://github.com/ClickHouse/ClickHouse/pull/65335) ([Eduard Karacharov](https://github.com/korowa)). +* Add system tables with main information about all detached tables. [#65400](https://github.com/ClickHouse/ClickHouse/pull/65400) ([Konstantin Morozov](https://github.com/k-morozov)). +* Add support for `cluster_for_parallel_replicas` when using custom key parallel replicas. It allows you to use parallel replicas with custom key with MergeTree tables. [#65453](https://github.com/ClickHouse/ClickHouse/pull/65453) ([Antonio Andelic](https://github.com/antonio2368)). +* Support query plan LIMIT optimization. Support LIMIT pushdown for PostgreSQL storage and table function. [#65454](https://github.com/ClickHouse/ClickHouse/pull/65454) ([Maksim Kita](https://github.com/kitaisreal)). +* Arraymin/max can be applicable to all data types that are comparable. [#65455](https://github.com/ClickHouse/ClickHouse/pull/65455) ([pn](https://github.com/chloro-pn)). +* Improved memory accounting for cgroups v2 to exclude the amount occupied by the page cache. [#65470](https://github.com/ClickHouse/ClickHouse/pull/65470) ([Nikita Taranov](https://github.com/nickitat)). +* Do not create format settings for each rows when serializing chunks to insert to EmbeddedRocksDB table. [#65474](https://github.com/ClickHouse/ClickHouse/pull/65474) ([Duc Canh Le](https://github.com/canhld94)). +* Fixed out-of-range exception in parsing Dwarf5 on s390x. [#65501](https://github.com/ClickHouse/ClickHouse/pull/65501) ([Harry Lee](https://github.com/HarryLeeIBM)). +* Reduce `clickhouse-local` prompt to just `:)`. `getFQDNOrHostName()` takes too long on macOS, and we don't want a hostname in the prompt for `clickhouse-local` anyway. [#65510](https://github.com/ClickHouse/ClickHouse/pull/65510) ([Konstantin Bogdanov](https://github.com/thevar1able)). +* Avoid printing a message from jemalloc about per-CPU arenas on low-end virtual machines. [#65532](https://github.com/ClickHouse/ClickHouse/pull/65532) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Disable filesystem cache background download by default. It will be enabled back when we fix the issue with possible "Memory limit exceeded" because memory deallocation is done outside of query context (while buffer is allocated inside of query context) if we use background download threads. Plus we need to add a separate setting to define max size to download for background workers (currently it is limited by max_file_segment_size, which might be too big). [#65534](https://github.com/ClickHouse/ClickHouse/pull/65534) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Add new option to config `` which allow to specify how often clickhouse will reload config. [#65545](https://github.com/ClickHouse/ClickHouse/pull/65545) ([alesapin](https://github.com/alesapin)). +* Implement binary encoding for ClickHouse data types and add its specification in docs. Use it in Dynamic binary serialization, allow to use it in RowBinaryWithNamesAndTypes and Native formats under settings. [#65546](https://github.com/ClickHouse/ClickHouse/pull/65546) ([Kruglov Pavel](https://github.com/Avogar)). +* Improved ZooKeeper load balancing. The current session doesn't expire until the optimal nodes become available despite `fallback_session_lifetime`. Added support for AZ-aware balancing. [#65570](https://github.com/ClickHouse/ClickHouse/pull/65570) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Server settings `compiled_expression_cache_size` and `compiled_expression_cache_elements_size` are now shown in `system.server_settings`. [#65584](https://github.com/ClickHouse/ClickHouse/pull/65584) ([Robert Schulze](https://github.com/rschu1ze)). +* When lightweight delete happens on a table with projection(s), users have choices either throw an exception (by default) or drop the projection lightweight delete would happen. [#65594](https://github.com/ClickHouse/ClickHouse/pull/65594) ([jsc0218](https://github.com/jsc0218)). +* Add support for user identification based on x509 SubjectAltName extension. [#65626](https://github.com/ClickHouse/ClickHouse/pull/65626) ([Anton Kozlov](https://github.com/tonickkozlov)). +* `clickhouse-local` will respect the `max_server_memory_usage` and `max_server_memory_usage_to_ram_ratio` from the configuration file. It will also set the max memory usage to 90% of the system memory by default, like `clickhouse-server` does. This closes [#65695](https://github.com/ClickHouse/ClickHouse/issues/65695). [#65697](https://github.com/ClickHouse/ClickHouse/pull/65697) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add a script to backup your files to ClickHouse. This is strange, but works. [#65699](https://github.com/ClickHouse/ClickHouse/pull/65699) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* PostgreSQL source support cancel. [#65722](https://github.com/ClickHouse/ClickHouse/pull/65722) ([Maksim Kita](https://github.com/kitaisreal)). +* Make allow_experimental_analyzer be controlled by the initiator for distributed queries. This ensures compatibility and correctness during operations in mixed version clusters. [#65777](https://github.com/ClickHouse/ClickHouse/pull/65777) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Respect cgroup CPU limit in Keeper. [#65819](https://github.com/ClickHouse/ClickHouse/pull/65819) ([Antonio Andelic](https://github.com/antonio2368)). +* Allow to use `concat` function with empty arguments ``` sql :) select concat();. [#65887](https://github.com/ClickHouse/ClickHouse/pull/65887) ([李扬](https://github.com/taiyang-li)). +* Allow controlling named collections in clickhouse-local. [#65973](https://github.com/ClickHouse/ClickHouse/pull/65973) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Improve Azure profile events. [#65999](https://github.com/ClickHouse/ClickHouse/pull/65999) ([alesapin](https://github.com/alesapin)). +* `Query was cancelled` might have been printed twice in clickhouse-client. This behaviour is fixed. [#66005](https://github.com/ClickHouse/ClickHouse/pull/66005) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Support ORC file read by writer time zone. [#66025](https://github.com/ClickHouse/ClickHouse/pull/66025) ([kevinyhzou](https://github.com/KevinyhZou)). +* Refactor JSONExtract functions, support more types including experimental Dynamic type. [#66046](https://github.com/ClickHouse/ClickHouse/pull/66046) ([Kruglov Pavel](https://github.com/Avogar)). +* DatabaseCatalog drops tables faster by using up to database_catalog_drop_table_concurrency threads. [#66065](https://github.com/ClickHouse/ClickHouse/pull/66065) ([Sema Checherinda](https://github.com/CheSema)). +* This PR changes how deduplication for MV works. Fixed a lot of cases like: - on destination table: data is split for 2 or more blocks and that blocks is considered as duplicate when that block is inserted in parallel. - on MV destination table: the equal blocks are deduplicated, that happens when MV often produces equal data as a result for different input data due to performing aggregation. - on MV destination table: the equal blocks which comes from different MV are deduplicated. [#66144](https://github.com/ClickHouse/ClickHouse/pull/66144) ([Sema Checherinda](https://github.com/CheSema)). +* Support null map subcolumn for Variant and Dynamic subcolumns. [#66178](https://github.com/ClickHouse/ClickHouse/pull/66178) ([Kruglov Pavel](https://github.com/Avogar)). +* Add settings to control connection to the PostgreSQL. * Setting `postgresql_connection_attempt_timeout` specifies the value passed to `connect_timeout` parameter of connection URL. * Setting `postgresql_connection_pool_retries` specifies the number of retries to establish a connection to the PostgreSQL end-point. [#66232](https://github.com/ClickHouse/ClickHouse/pull/66232) ([Dmitry Novik](https://github.com/novikd)). +* Reduce inaccuracy of input_wait_elapsed_us/input_wait_elapsed_us/elapsed_us. [#66239](https://github.com/ClickHouse/ClickHouse/pull/66239) ([Azat Khuzhin](https://github.com/azat)). +* Improve FilesystemCache ProfileEvents. [#66249](https://github.com/ClickHouse/ClickHouse/pull/66249) ([zhukai](https://github.com/nauu)). +* Add settings to ignore ON CLUSTER clause in queries for named collection management with replicated storage. [#66288](https://github.com/ClickHouse/ClickHouse/pull/66288) ([MikhailBurdukov](https://github.com/MikhailBurdukov)). +* Upgraded `pocketfft` dependency to the recent commit https://github.com/mreineck/pocketfft/commit/f4c1aa8aa9ce79ad39e80f2c9c41b92ead90fda3. [#66291](https://github.com/ClickHouse/ClickHouse/pull/66291) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Upgraded `azure-sdk-for-cpp` to the recent commit https://github.com/ClickHouse/azure-sdk-for-cpp/commit/ea3e19a7be08519134c643177d56c7484dfec884. [#66292](https://github.com/ClickHouse/ClickHouse/pull/66292) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Function `generateSnowflakeID` now allows to specify a machine ID as a parameter to prevent collisions in large clusters. [#66374](https://github.com/ClickHouse/ClickHouse/pull/66374) ([ZAWA_ll](https://github.com/Zawa-ll)). +* Disable suspending on Ctrl+Z in interactive mode. This is a common trap and is not expected behavior for almost all users. I imagine only a few extreme power users could appreciate suspending terminal applications to the background, but I don't know any. [#66511](https://github.com/ClickHouse/ClickHouse/pull/66511) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add option for validating the Primary key type in Dictionaries. Without this option for simple layouts any column type will be implicitly converted to UInt64. ### Documentation entry for user-facing changes. [#66595](https://github.com/ClickHouse/ClickHouse/pull/66595) ([MikhailBurdukov](https://github.com/MikhailBurdukov)). +* Fix SHOW MERGES remaining time. [#66735](https://github.com/ClickHouse/ClickHouse/pull/66735) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### Critical Bug Fix (crash, LOGICAL_ERROR, data loss, RBAC) +* Fix unexpeced size of low cardinality column in function calls. [#65298](https://github.com/ClickHouse/ClickHouse/pull/65298) ([Raúl Marín](https://github.com/Algunenano)). +* Check cyclic dependencies on CREATE/REPLACE/RENAME/EXCHANGE queries and throw an exception if there is a cyclic dependency. Previously such cyclic dependencies could lead to a deadlock during server startup. Closes [#65355](https://github.com/ClickHouse/ClickHouse/issues/65355). Also fix some bugs in dependencies creation. [#65405](https://github.com/ClickHouse/ClickHouse/pull/65405) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix crash in maxIntersections. [#65689](https://github.com/ClickHouse/ClickHouse/pull/65689) ([Raúl Marín](https://github.com/Algunenano)). +* Fix the VALID UNTIL clause in the user definition resetting after a restart. Closes [#66405](https://github.com/ClickHouse/ClickHouse/issues/66405). [#66409](https://github.com/ClickHouse/ClickHouse/pull/66409) ([Nikolay Degterinsky](https://github.com/evillique)). + +#### Bug Fix (user-visible misbehavior in an official stable release) +* Fixed crash while using MaterializedMySQL with TABLE OVERRIDE that maps MySQL NULL field into ClickHouse not NULL field. [#54649](https://github.com/ClickHouse/ClickHouse/pull/54649) ([Filipp Ozinov](https://github.com/bakwc)). +* Fix logical error when PREWHERE expression read no columns and table has no adaptive index granularity (very old table). Fix [#56640](https://github.com/ClickHouse/ClickHouse/issues/56640). [#59173](https://github.com/ClickHouse/ClickHouse/pull/59173) ([Alexander Gololobov](https://github.com/davenger)). +* Fix bug with cancelation buffer when canceling a query. [#64478](https://github.com/ClickHouse/ClickHouse/pull/64478) ([Sema Checherinda](https://github.com/CheSema)). +* Fix filling parts columns from metadata (when columns.txt does not exists). [#64757](https://github.com/ClickHouse/ClickHouse/pull/64757) ([Azat Khuzhin](https://github.com/azat)). +* Fix AST formatting of 'create table b empty as a'. [#64951](https://github.com/ClickHouse/ClickHouse/pull/64951) ([Michael Kolupaev](https://github.com/al13n321)). +* Fix crash for `ALTER TABLE ... ON CLUSTER ... MODIFY SQL SECURITY`. [#64957](https://github.com/ClickHouse/ClickHouse/pull/64957) ([pufit](https://github.com/pufit)). +* Fix crash on destroying AccessControl: add explicit shutdown. [#64993](https://github.com/ClickHouse/ClickHouse/pull/64993) ([Vitaly Baranov](https://github.com/vitlibar)). +* Eliminate injective function in argument of functions `uniq*` recursively. This used to work correctly but was broken in the new analyzer. [#65140](https://github.com/ClickHouse/ClickHouse/pull/65140) ([Duc Canh Le](https://github.com/canhld94)). +* Fix unexpected projection name when query with CTE. [#65267](https://github.com/ClickHouse/ClickHouse/pull/65267) ([wudidapaopao](https://github.com/wudidapaopao)). +* Require `dictGet` privilege when accessing dictionaries via direct query or the `Dictionary` table engine. [#65359](https://github.com/ClickHouse/ClickHouse/pull/65359) ([Joe Lynch](https://github.com/joelynch)). +* Fix user-specific S3 auth with incremental backups. [#65481](https://github.com/ClickHouse/ClickHouse/pull/65481) ([Antonio Andelic](https://github.com/antonio2368)). +* Disable `non-intersecting-parts` optimization for queries with `FINAL` in case of `read-in-order` optimization was enabled. This could lead to an incorrect query result. As a workaround, disable `do_not_merge_across_partitions_select_final` and `split_parts_ranges_into_intersecting_and_non_intersecting_final` before this fix is merged. [#65505](https://github.com/ClickHouse/ClickHouse/pull/65505) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix getting exception `Index out of bound for blob metadata` in case all files from list batch were filtered out. [#65523](https://github.com/ClickHouse/ClickHouse/pull/65523) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix NOT_FOUND_COLUMN_IN_BLOCK for deduplicate merge of projection. [#65573](https://github.com/ClickHouse/ClickHouse/pull/65573) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Fixed bug in MergeJoin. Column in sparse serialisation might be treated as a column of its nested type though the required conversion wasn't performed. [#65632](https://github.com/ClickHouse/ClickHouse/pull/65632) ([Nikita Taranov](https://github.com/nickitat)). +* Fixed a bug that compatibility level '23.4' was not properly applied. [#65737](https://github.com/ClickHouse/ClickHouse/pull/65737) ([cw5121](https://github.com/cw5121)). +* Fix odbc table with nullable fields. [#65738](https://github.com/ClickHouse/ClickHouse/pull/65738) ([Rodolphe Dugé de Bernonville](https://github.com/RodolpheDuge)). +* Fix data race in `TCPHandler`, which could happen on fatal error. [#65744](https://github.com/ClickHouse/ClickHouse/pull/65744) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix invalid exceptions in function `parseDateTime` with `%F` and `%D` placeholders. [#65768](https://github.com/ClickHouse/ClickHouse/pull/65768) ([Antonio Andelic](https://github.com/antonio2368)). +* For queries that read from `PostgreSQL`, cancel the internal `PostgreSQL` query if the ClickHouse query is finished. Otherwise, `ClickHouse` query cannot be canceled until the internal `PostgreSQL` query is finished. [#65771](https://github.com/ClickHouse/ClickHouse/pull/65771) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix a bug in short circuit logic when old analyzer and dictGetOrDefault is used. [#65802](https://github.com/ClickHouse/ClickHouse/pull/65802) ([jsc0218](https://github.com/jsc0218)). +* Fix a bug leads to EmbeddedRocksDB with TTL write corrupted SST files. [#65816](https://github.com/ClickHouse/ClickHouse/pull/65816) ([Duc Canh Le](https://github.com/canhld94)). +* Functions `bitTest`, `bitTestAll`, and `bitTestAny` now return an error if the specified bit index is out-of-bounds (issue [#65517](https://github.com/ClickHouse/ClickHouse/issues/65517)). [#65818](https://github.com/ClickHouse/ClickHouse/pull/65818) ([Pablo Marcos](https://github.com/pamarcos)). +* Setting `join_any_take_last_row` is supported in any query with hash join. [#65820](https://github.com/ClickHouse/ClickHouse/pull/65820) ([vdimir](https://github.com/vdimir)). +* Better handling of join conditions involving `IS NULL` checks (for example `ON (a = b AND (a IS NOT NULL) AND (b IS NOT NULL) ) OR ( (a IS NULL) AND (b IS NULL) )` is rewritten to `ON a <=> b`), fix incorrect optimization when condition other then `IS NULL` are present. [#65835](https://github.com/ClickHouse/ClickHouse/pull/65835) ([vdimir](https://github.com/vdimir)). +* Functions `bitShiftLeft` and `bitShitfRight` return an error for out of bounds shift positions (issue [#65516](https://github.com/ClickHouse/ClickHouse/issues/65516)). [#65838](https://github.com/ClickHouse/ClickHouse/pull/65838) ([Pablo Marcos](https://github.com/pamarcos)). +* Fix growing memory usage in S3Queue. [#65839](https://github.com/ClickHouse/ClickHouse/pull/65839) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix tie handling in `arrayAUC` to match sklearn. [#65840](https://github.com/ClickHouse/ClickHouse/pull/65840) ([gabrielmcg44](https://github.com/gabrielmcg44)). +* Fix possible issues with MySQL server protocol TLS connections. [#65917](https://github.com/ClickHouse/ClickHouse/pull/65917) ([Azat Khuzhin](https://github.com/azat)). +* Fix possible issues with MySQL client protocol TLS connections. [#65938](https://github.com/ClickHouse/ClickHouse/pull/65938) ([Azat Khuzhin](https://github.com/azat)). +* Fix handling of `SSL_ERROR_WANT_READ`/`SSL_ERROR_WANT_WRITE` with zero timeout. [#65941](https://github.com/ClickHouse/ClickHouse/pull/65941) ([Azat Khuzhin](https://github.com/azat)). +* Add missing settings `input_format_csv_skip_first_lines/input_format_tsv_skip_first_lines/input_format_csv_try_infer_numbers_from_strings/input_format_csv_try_infer_strings_from_quoted_tuples` in schema inference cache because they can change the resulting schema. It prevents from incorrect result of schema inference with these settings changed. [#65980](https://github.com/ClickHouse/ClickHouse/pull/65980) ([Kruglov Pavel](https://github.com/Avogar)). +* Column _size in s3 engine and s3 table function denotes the size of a file inside the archive, not a size of the archive itself. [#65993](https://github.com/ClickHouse/ClickHouse/pull/65993) ([Daniil Ivanik](https://github.com/divanik)). +* Fix resolving dynamic subcolumns in analyzer, avoid reading the whole column on dynamic subcolumn reading. [#66004](https://github.com/ClickHouse/ClickHouse/pull/66004) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix config merging for from_env with replace overrides. [#66034](https://github.com/ClickHouse/ClickHouse/pull/66034) ([Azat Khuzhin](https://github.com/azat)). +* Fix a possible hanging in `GRPCServer` during shutdown. This PR fixes [#65622](https://github.com/ClickHouse/ClickHouse/issues/65622). [#66061](https://github.com/ClickHouse/ClickHouse/pull/66061) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix reading dynamic subcolumns from altered Memory table. Previously if `max_types` peremeter of a Dynamic type was changed in Memory table via alter, further subcolumns reading can return wrong result. [#66066](https://github.com/ClickHouse/ClickHouse/pull/66066) ([Kruglov Pavel](https://github.com/Avogar)). +* Fixed several cases in function `has` with non-constant `LowCardinality` arguments. [#66088](https://github.com/ClickHouse/ClickHouse/pull/66088) ([Anton Popov](https://github.com/CurtizJ)). +* Fix for `groupArrayIntersect`. It had incorrect behavior in the `merge()` function. Also, fixed behavior in `deserialise()` for numeric and general data. [#66103](https://github.com/ClickHouse/ClickHouse/pull/66103) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Fixed buffer overflow bug in `unbin`/`unhex` implementation. [#66106](https://github.com/ClickHouse/ClickHouse/pull/66106) ([Nikita Taranov](https://github.com/nickitat)). +* Disable the `merge-filters` optimization introduced in [#64760](https://github.com/ClickHouse/ClickHouse/issues/64760). It may cause an exception if optimization merges two filter expressions and does not apply a short-circuit evaluation. [#66126](https://github.com/ClickHouse/ClickHouse/pull/66126) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fixed the issue when the server failed to parse Avro files with negative block size arrays encoded, which is now allowed by the Avro specification. [#66130](https://github.com/ClickHouse/ClickHouse/pull/66130) ([Serge Klochkov](https://github.com/slvrtrn)). +* Fixed a bug in ZooKeeper client: a session could get stuck in unusable state after receiving a hardware error from ZooKeeper. For example, this might happen due to "soft memory limit" in ClickHouse Keeper. [#66140](https://github.com/ClickHouse/ClickHouse/pull/66140) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix issue in SumIfToCountIfVisitor and signed integers. [#66146](https://github.com/ClickHouse/ClickHouse/pull/66146) ([Raúl Marín](https://github.com/Algunenano)). +* Fix rare case with missing data in the result of distributed query, close [#61432](https://github.com/ClickHouse/ClickHouse/issues/61432). [#66174](https://github.com/ClickHouse/ClickHouse/pull/66174) ([vdimir](https://github.com/vdimir)). +* Fix order of parsing metadata fields in StorageDeltaLake. [#66211](https://github.com/ClickHouse/ClickHouse/pull/66211) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Don't throw `TIMEOUT_EXCEEDED` for `none_only_active` mode of `distributed_ddl_output_mode`. [#66218](https://github.com/ClickHouse/ClickHouse/pull/66218) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix handling limit for `system.numbers_mt` when no index can be used. [#66231](https://github.com/ClickHouse/ClickHouse/pull/66231) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Fixed how the ClickHouse server detects the maximum number of usable CPU cores as specified by cgroups v2 if the server runs in a container such as Docker. In more detail, containers often run their process in the root cgroup which has an empty name. In that case, ClickHouse ignored the CPU limits set by cgroups v2. [#66237](https://github.com/ClickHouse/ClickHouse/pull/66237) ([filimonov](https://github.com/filimonov)). +* Fix the `Not-ready set` error when a subquery with `IN` is used in the constraint. [#66261](https://github.com/ClickHouse/ClickHouse/pull/66261) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix indexHint function case found by fuzzer. [#66286](https://github.com/ClickHouse/ClickHouse/pull/66286) ([Anton Popov](https://github.com/CurtizJ)). +* Fix error reporting while copying to S3 or AzureBlobStorage. [#66295](https://github.com/ClickHouse/ClickHouse/pull/66295) ([Vitaly Baranov](https://github.com/vitlibar)). +* Prevent watchdog from keeping descriptors of unlinked(rotated) log files. [#66334](https://github.com/ClickHouse/ClickHouse/pull/66334) ([Aleksei Filatov](https://github.com/aalexfvk)). +* Fix the bug that logicalexpressionoptimizerpass lost logical type of constant. closes [#64487](https://github.com/ClickHouse/ClickHouse/issues/64487). [#66344](https://github.com/ClickHouse/ClickHouse/pull/66344) ([pn](https://github.com/chloro-pn)). +* Fix `Column identifier is already registered` error with `group_by_use_nulls=true` and new analyzer. [#66400](https://github.com/ClickHouse/ClickHouse/pull/66400) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix possible incorrect result for queries joining and filtering table external engine (like PostgreSQL), due to too aggressive filter pushdown. Since now, conditions from where section won't be send to external database in case of outer join with external table. [#66402](https://github.com/ClickHouse/ClickHouse/pull/66402) ([vdimir](https://github.com/vdimir)). +* Added missing column materialization for cross join. [#66413](https://github.com/ClickHouse/ClickHouse/pull/66413) ([lgbo](https://github.com/lgbo-ustc)). +* Fix `Cannot find column` error for queries with constant expression in `GROUP BY` key and new analyzer enabled. [#66433](https://github.com/ClickHouse/ClickHouse/pull/66433) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Avoid possible logical error during import from Npy format in case of bad array nesting level, fix testing of other kinds of errors. [#66461](https://github.com/ClickHouse/ClickHouse/pull/66461) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Fix wrong count() result when there is non-deterministic function in predicate. [#66510](https://github.com/ClickHouse/ClickHouse/pull/66510) ([Duc Canh Le](https://github.com/canhld94)). +* Correctly track memory for `Allocator::realloc`. [#66548](https://github.com/ClickHouse/ClickHouse/pull/66548) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix reading of uninitialized memory when hashing empty tuples. This closes [#66559](https://github.com/ClickHouse/ClickHouse/issues/66559). [#66562](https://github.com/ClickHouse/ClickHouse/pull/66562) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix an invalid result for queries with `WINDOW`. This could happen when `PARTITION` columns have sparse serialization and window functions are executed in parallel. [#66579](https://github.com/ClickHouse/ClickHouse/pull/66579) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix removing named collections in local storage. [#66599](https://github.com/ClickHouse/ClickHouse/pull/66599) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Fix logical error in `PrometheusRequestHandler`. [#66621](https://github.com/ClickHouse/ClickHouse/pull/66621) ([Vitaly Baranov](https://github.com/vitlibar)). +* `column_length` is not updated in `ColumnTuple::insertManyFrom`. [#66626](https://github.com/ClickHouse/ClickHouse/pull/66626) ([lgbo](https://github.com/lgbo-ustc)). +* Fix `Unknown identifier` and `Column is not under aggregate function` errors for queries with the expression `(column IS NULL).` The bug was triggered by [#65088](https://github.com/ClickHouse/ClickHouse/issues/65088), with the disabled analyzer only. [#66654](https://github.com/ClickHouse/ClickHouse/pull/66654) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix `Method getResultType is not supported for QUERY query node` error when scalar subquery was used as the first argument of IN (with new analyzer). [#66655](https://github.com/ClickHouse/ClickHouse/pull/66655) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix possible PARAMETER_OUT_OF_BOUND error during reading variant subcolumn. [#66659](https://github.com/ClickHouse/ClickHouse/pull/66659) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix rare case of stuck merge after drop column. [#66707](https://github.com/ClickHouse/ClickHouse/pull/66707) ([Raúl Marín](https://github.com/Algunenano)). +* Fix assertion `isUniqTypes` when insert select from remote sources. [#66722](https://github.com/ClickHouse/ClickHouse/pull/66722) ([Sema Checherinda](https://github.com/CheSema)). +* Backported in [#67026](https://github.com/ClickHouse/ClickHouse/issues/67026): In rare cases ClickHouse could consider parts as broken because of some unexpected projections on disk. Now it's fixed. [#66898](https://github.com/ClickHouse/ClickHouse/pull/66898) ([alesapin](https://github.com/alesapin)). +* Backported in [#67443](https://github.com/ClickHouse/ClickHouse/issues/67443): Forbid create as select even when database_replicated_allow_heavy_create is set. It was unconditionally forbidden in 23.12 and accidentally allowed under the setting in unreleased 24.7. [#66980](https://github.com/ClickHouse/ClickHouse/pull/66980) ([vdimir](https://github.com/vdimir)). +* Backported in [#67201](https://github.com/ClickHouse/ClickHouse/issues/67201): TRUNCATE DATABASE used to stop replication as if it was a DROP DATABASE query, it's fixed. [#67129](https://github.com/ClickHouse/ClickHouse/pull/67129) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#67383](https://github.com/ClickHouse/ClickHouse/issues/67383): Fix error `Cannot convert column because it is non constant in source stream but must be constant in result.` for a query that reads from the `Merge` table over the `Distriburted` table with one shard. [#67146](https://github.com/ClickHouse/ClickHouse/pull/67146) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#67246](https://github.com/ClickHouse/ClickHouse/issues/67246): This closes [#67156](https://github.com/ClickHouse/ClickHouse/issues/67156). This closes [#66447](https://github.com/ClickHouse/ClickHouse/issues/66447). The bug was introduced in https://github.com/ClickHouse/ClickHouse/pull/62907. [#67178](https://github.com/ClickHouse/ClickHouse/pull/67178) ([Maksim Kita](https://github.com/kitaisreal)). + +#### Build/Testing/Packaging Improvement +* Instantiate template methods ahead in different .cpp files, avoid too large translation units during compiling. [#64818](https://github.com/ClickHouse/ClickHouse/pull/64818) ([lgbo](https://github.com/lgbo-ustc)). +* Stateless tests: Improve tests speed and decrease number of parallel jobs. [#65186](https://github.com/ClickHouse/ClickHouse/pull/65186) ([Nikita Fomichev](https://github.com/fm4v)). +* Add tests for `base64URLEncode` and `base64URLDecode`. Add analyzer tests. [#65979](https://github.com/ClickHouse/ClickHouse/pull/65979) ([Nikita Fomichev](https://github.com/fm4v)). +* Fix problem when github terminate instances by timeout and artifacts are not collected and full test report is not generated. [#66036](https://github.com/ClickHouse/ClickHouse/pull/66036) ([Nikita Fomichev](https://github.com/fm4v)). +* Fix test [test_grpc_protocol/test.py::test_progress](https://s3.amazonaws.com/clickhouse-test-reports/57695/188f8a3df74caf830ad1ced3c4cf6dfb0aa90093/integration_tests__asan__old_analyzer__[4_6].html). [#66063](https://github.com/ClickHouse/ClickHouse/pull/66063) ([Vitaly Baranov](https://github.com/vitlibar)). +* Stateless tests: Improve tests speed and decrease number of parallel jobs. [#66305](https://github.com/ClickHouse/ClickHouse/pull/66305) ([Nikita Fomichev](https://github.com/fm4v)). +* Stateless tests: Improve tests speed and decrease number of parallel jobs 3. [#66363](https://github.com/ClickHouse/ClickHouse/pull/66363) ([Nikita Fomichev](https://github.com/fm4v)). +* Tests: fix tests hang up in cases when gdb catches error. [#66411](https://github.com/ClickHouse/ClickHouse/pull/66411) ([Nikita Fomichev](https://github.com/fm4v)). +* ... since [Release v24.6.1.4423-stable](https://github.com/ClickHouse/ClickHouse/releases/tag/v24.6.1.4423-stable) when build in ppc64le with dynamic openssl build (`cmake -DENABLE_OPENSSL_DYNAMIC=1 -DCMAKE_TOOLCHAIN_FILE= cmake/linux/toolchain-ppc64le.cmake `) got error: ` ld.lld: error: duplicate symbol: OPENSSL_cleanse`. [#66733](https://github.com/ClickHouse/ClickHouse/pull/66733) ([Yong Wang](https://github.com/kashwy)). + +#### NO CL CATEGORY + +* Backported in [#67084](https://github.com/ClickHouse/ClickHouse/issues/67084):. [#67040](https://github.com/ClickHouse/ClickHouse/pull/67040) ([Raúl Marín](https://github.com/Algunenano)). +* Backported in [#67452](https://github.com/ClickHouse/ClickHouse/issues/67452):. [#67392](https://github.com/ClickHouse/ClickHouse/pull/67392) ([alesapin](https://github.com/alesapin)). + +#### NO CL ENTRY + +* NO CL ENTRY: 'Revert "Revert "Small fix for 02340_parts_refcnt_mergetree""'. [#65155](https://github.com/ClickHouse/ClickHouse/pull/65155) ([Nikita Taranov](https://github.com/nickitat)). +* NO CL ENTRY: 'Revert "Use 1MB HTTP buffers to avoid frequent send syscalls"'. [#65498](https://github.com/ClickHouse/ClickHouse/pull/65498) ([Sergei Trifonov](https://github.com/serxa)). +* NO CL ENTRY: 'Revert "Resubmit http_external_tables_memory_tracking test"'. [#65500](https://github.com/ClickHouse/ClickHouse/pull/65500) ([Nikita Taranov](https://github.com/nickitat)). +* NO CL ENTRY: 'Revert "Add an assertion in ReplicatedMergeTreeQueue"'. [#65686](https://github.com/ClickHouse/ClickHouse/pull/65686) ([Raúl Marín](https://github.com/Algunenano)). +* NO CL ENTRY: 'Revert "insertion deduplication on retries for materialised views"'. [#66134](https://github.com/ClickHouse/ClickHouse/pull/66134) ([Sema Checherinda](https://github.com/CheSema)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Return and fix 01600_parts_states_metrics_long test. [#58748](https://github.com/ClickHouse/ClickHouse/pull/58748) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Add azure_cache as storage policy to tests. [#59943](https://github.com/ClickHouse/ClickHouse/pull/59943) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Minor: replaced expression with LEGACY_MAX_LEVEL. [#61268](https://github.com/ClickHouse/ClickHouse/pull/61268) ([Vasily Nemkov](https://github.com/Enmk)). +* Make write to temporary data in cache do all checks and assertions as during write to ordinary cache. [#63348](https://github.com/ClickHouse/ClickHouse/pull/63348) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Refactoring near azure blob storage. [#63636](https://github.com/ClickHouse/ClickHouse/pull/63636) ([Anton Popov](https://github.com/CurtizJ)). +* Everything should work with Analyzer. [#63643](https://github.com/ClickHouse/ClickHouse/pull/63643) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* add some log for non using detached tables. [#64992](https://github.com/ClickHouse/ClickHouse/pull/64992) ([Konstantin Morozov](https://github.com/k-morozov)). +* Remove dag flags. [#65234](https://github.com/ClickHouse/ClickHouse/pull/65234) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix flaky autocompletion test. [#65246](https://github.com/ClickHouse/ClickHouse/pull/65246) ([Konstantin Bogdanov](https://github.com/thevar1able)). +* Disable userspace page cache by default. [#65305](https://github.com/ClickHouse/ClickHouse/pull/65305) ([Michael Kolupaev](https://github.com/al13n321)). +* Update version_date.tsv and changelogs after v24.4.3.25-stable. [#65308](https://github.com/ClickHouse/ClickHouse/pull/65308) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Do not raise a NOT_IMPLEMENTED error when getting s3 metrics with a multiple disk configuration. [#65403](https://github.com/ClickHouse/ClickHouse/pull/65403) ([Elena Torró](https://github.com/elenatorro)). +* Dodging reading from wrong table with parallel replicas. [#65417](https://github.com/ClickHouse/ClickHouse/pull/65417) ([Nikita Taranov](https://github.com/nickitat)). +* Fix: return error if can't connect to any replicas chosen for query execution. [#65467](https://github.com/ClickHouse/ClickHouse/pull/65467) ([Igor Nikonov](https://github.com/devcrafter)). +* Fix `AzureObjectStorage::exists` method. [#65471](https://github.com/ClickHouse/ClickHouse/pull/65471) ([Anton Popov](https://github.com/CurtizJ)). +* Update version after release. [#65483](https://github.com/ClickHouse/ClickHouse/pull/65483) ([Raúl Marín](https://github.com/Algunenano)). +* Generate 24.6 changelog. [#65485](https://github.com/ClickHouse/ClickHouse/pull/65485) ([Raúl Marín](https://github.com/Algunenano)). +* Fix of `PlanSquashingTransform`: pipeline stuck. [#65487](https://github.com/ClickHouse/ClickHouse/pull/65487) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Fix bad test `02922_deduplication_with_zero_copy`. [#65492](https://github.com/ClickHouse/ClickHouse/pull/65492) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Disable clang-format in special areas. [#65495](https://github.com/ClickHouse/ClickHouse/pull/65495) ([Nikita Taranov](https://github.com/nickitat)). +* Fix `test_keeper_snapshots`. [#65497](https://github.com/ClickHouse/ClickHouse/pull/65497) ([Antonio Andelic](https://github.com/antonio2368)). +* Update to libunwind 8.1.7. [#65509](https://github.com/ClickHouse/ClickHouse/pull/65509) ([Michael Kolupaev](https://github.com/al13n321)). +* Setting `uniform_snowflake_conversion_functions` (not in any release yet) was replaced by setting `allow_deprecated_snowflake_conversion_functions`. The latter controls if the legacy snowflake conversion functions are available (by default, they are not). [#65522](https://github.com/ClickHouse/ClickHouse/pull/65522) ([Robert Schulze](https://github.com/rschu1ze)). +* Try CI without RerunCheck, jobs can be easily rerun manually though extra amount of work in CI will follow on workflow restart. [#65524](https://github.com/ClickHouse/ClickHouse/pull/65524) ([Max K.](https://github.com/maxknv)). +* Bump re2 to latest HEAD. [#65526](https://github.com/ClickHouse/ClickHouse/pull/65526) ([Robert Schulze](https://github.com/rschu1ze)). +* OpenSSL: Replace temporary fix for unsynchronized access by official fix. [#65529](https://github.com/ClickHouse/ClickHouse/pull/65529) ([Robert Schulze](https://github.com/rschu1ze)). +* Update README.md. [#65531](https://github.com/ClickHouse/ClickHouse/pull/65531) ([Tyler Hannan](https://github.com/tylerhannan)). +* CI: some time there are timeouts on DROP TABLES for random tests. [#65535](https://github.com/ClickHouse/ClickHouse/pull/65535) ([Sema Checherinda](https://github.com/CheSema)). +* Synchronize `MARK_CACHE_SIZE` value in default settings and config. [#65547](https://github.com/ClickHouse/ClickHouse/pull/65547) ([Denny Crane](https://github.com/den-crane)). +* CI: Skip removed test files in stateless flaky check job. [#65553](https://github.com/ClickHouse/ClickHouse/pull/65553) ([Max K.](https://github.com/maxknv)). +* Renames Build report jobs. [#65554](https://github.com/ClickHouse/ClickHouse/pull/65554) ([Max K.](https://github.com/maxknv)). +* Parse user from URL for dashboard.html (useful for sharing). [#65556](https://github.com/ClickHouse/ClickHouse/pull/65556) ([Azat Khuzhin](https://github.com/azat)). +* Remove tech debt. [#65561](https://github.com/ClickHouse/ClickHouse/pull/65561) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Maybe fix test `00763_lock_buffer_long.sh`. [#65562](https://github.com/ClickHouse/ClickHouse/pull/65562) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix clickhouse-keeper with not system-wide directories and provide override for local development. [#65563](https://github.com/ClickHouse/ClickHouse/pull/65563) ([Azat Khuzhin](https://github.com/azat)). +* Re-configure yamllint to allow document-start. [#65565](https://github.com/ClickHouse/ClickHouse/pull/65565) ([Azat Khuzhin](https://github.com/azat)). +* Fix flaky test `01254_dict_load_after_detach_attach.sql`. [#65571](https://github.com/ClickHouse/ClickHouse/pull/65571) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Improve flaky test to provide more diagnostics. [#65586](https://github.com/ClickHouse/ClickHouse/pull/65586) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix test_parallel_replicas_distributed_skip_shards flakiness. [#65588](https://github.com/ClickHouse/ClickHouse/pull/65588) ([Igor Nikonov](https://github.com/devcrafter)). +* Fix an error in the test about metadata_type. [#65592](https://github.com/ClickHouse/ClickHouse/pull/65592) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix slow test. [#65593](https://github.com/ClickHouse/ClickHouse/pull/65593) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* fix flaky 02864_statistics_uniq. [#65599](https://github.com/ClickHouse/ClickHouse/pull/65599) ([Han Fei](https://github.com/hanfei1991)). +* Fix 03172_error_log_table_not_empty. [#65604](https://github.com/ClickHouse/ClickHouse/pull/65604) ([Pablo Marcos](https://github.com/pamarcos)). +* Enable realtime digest for Jepsen tests. [#65608](https://github.com/ClickHouse/ClickHouse/pull/65608) ([Antonio Andelic](https://github.com/antonio2368)). +* CI: Return Job Rerun check. [#65613](https://github.com/ClickHouse/ClickHouse/pull/65613) ([Max K.](https://github.com/maxknv)). +* Update CHANGELOG.md. [#65624](https://github.com/ClickHouse/ClickHouse/pull/65624) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Module is required for authenticating in GH (in cloud). [#65628](https://github.com/ClickHouse/ClickHouse/pull/65628) ([Max K.](https://github.com/maxknv)). +* Update IObjectStorage.h. [#65631](https://github.com/ClickHouse/ClickHouse/pull/65631) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix 02834_apache_arrow_abort flakiness with MSAN. [#65640](https://github.com/ClickHouse/ClickHouse/pull/65640) ([Michael Kolupaev](https://github.com/al13n321)). +* Fix overflow in StorageWindowView. [#65641](https://github.com/ClickHouse/ClickHouse/pull/65641) ([Michael Kolupaev](https://github.com/al13n321)). +* Fix inconsistent AST formatting when a keyword is used as type name. [#65648](https://github.com/ClickHouse/ClickHouse/pull/65648) ([Michael Kolupaev](https://github.com/al13n321)). +* CI: Single point of setting mergeable check status. [#65658](https://github.com/ClickHouse/ClickHouse/pull/65658) ([Max K.](https://github.com/maxknv)). +* Miscellaneous and insignificant changes around Client/ClientBase. [#65669](https://github.com/ClickHouse/ClickHouse/pull/65669) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Add Replicated database names to ZooKeeper for introspection. [#65675](https://github.com/ClickHouse/ClickHouse/pull/65675) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Introduce type contract checks in `IColumn`. [#65687](https://github.com/ClickHouse/ClickHouse/pull/65687) ([Nikita Taranov](https://github.com/nickitat)). +* Print slightly more information in 02982_aggregation_states_destruction. [#65688](https://github.com/ClickHouse/ClickHouse/pull/65688) ([Michael Kolupaev](https://github.com/al13n321)). +* Disable stacktrace collection in GWPAsan by default. [#65701](https://github.com/ClickHouse/ClickHouse/pull/65701) ([Antonio Andelic](https://github.com/antonio2368)). +* Build jemalloc with profiler. [#65702](https://github.com/ClickHouse/ClickHouse/pull/65702) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix duplicate symbol linkage error. [#65705](https://github.com/ClickHouse/ClickHouse/pull/65705) ([Nikita Taranov](https://github.com/nickitat)). +* Fix server restarts in performance tests. [#65717](https://github.com/ClickHouse/ClickHouse/pull/65717) ([Antonio Andelic](https://github.com/antonio2368)). +* Update 03002_part_log_rmt_fetch_mutate_error.sql. [#65720](https://github.com/ClickHouse/ClickHouse/pull/65720) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix bug with session closing in Keeper. [#65735](https://github.com/ClickHouse/ClickHouse/pull/65735) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix flaky `02265_column_ttl`. Closes [#65719](https://github.com/ClickHouse/ClickHouse/issues/65719). [#65742](https://github.com/ClickHouse/ClickHouse/pull/65742) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* See [#65745](https://github.com/ClickHouse/ClickHouse/issues/65745). It doesn't solve the issue, but helps a bit. [#65746](https://github.com/ClickHouse/ClickHouse/pull/65746) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Update CHANGELOG.md. [#65752](https://github.com/ClickHouse/ClickHouse/pull/65752) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* document declarative ssh-keys authentication. [#65756](https://github.com/ClickHouse/ClickHouse/pull/65756) ([Tobias Florek](https://github.com/ibotty)). +* `base64En/Decode64Url` --> `base64En/Decode64URL`. [#65760](https://github.com/ClickHouse/ClickHouse/pull/65760) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix for issue [#65757](https://github.com/ClickHouse/ClickHouse/issues/65757). [#65763](https://github.com/ClickHouse/ClickHouse/pull/65763) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Fix flaky `test_replicated_database::test_alter_attach`. [#65766](https://github.com/ClickHouse/ClickHouse/pull/65766) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix: progress bar for read in order queries. [#65769](https://github.com/ClickHouse/ClickHouse/pull/65769) ([Igor Nikonov](https://github.com/devcrafter)). +* CI: Fix for Builds report job in backports and releases. [#65774](https://github.com/ClickHouse/ClickHouse/pull/65774) ([Max K.](https://github.com/maxknv)). +* CI: New create release workflow. [#65775](https://github.com/ClickHouse/ClickHouse/pull/65775) ([Max K.](https://github.com/maxknv)). +* fixed misspelled word. [#65778](https://github.com/ClickHouse/ClickHouse/pull/65778) ([Linh Giang](https://github.com/linhgiang24)). +* Refactor statistics interface. [#65792](https://github.com/ClickHouse/ClickHouse/pull/65792) ([Robert Schulze](https://github.com/rschu1ze)). +* Try to make `test_ldap_external_user_directory` less flaky. [#65794](https://github.com/ClickHouse/ClickHouse/pull/65794) ([Andrey Zvonov](https://github.com/zvonand)). +* AMI image with gh and jwt. [#65795](https://github.com/ClickHouse/ClickHouse/pull/65795) ([Max K.](https://github.com/maxknv)). +* Forbid join algorithm randomisation for 03094_one_thousand_joins. [#65798](https://github.com/ClickHouse/ClickHouse/pull/65798) ([Nikita Taranov](https://github.com/nickitat)). +* Fix 02931_rewrite_sum_column_and_constant flakiness. [#65800](https://github.com/ClickHouse/ClickHouse/pull/65800) ([Michael Kolupaev](https://github.com/al13n321)). +* Update StorageMaterializedView.cpp. [#65801](https://github.com/ClickHouse/ClickHouse/pull/65801) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fix slow `getFQDNOrHostNameImpl` on macOS. [#65803](https://github.com/ClickHouse/ClickHouse/pull/65803) ([Konstantin Bogdanov](https://github.com/thevar1able)). +* No jemalloc profiler for non-Linux. [#65834](https://github.com/ClickHouse/ClickHouse/pull/65834) ([Antonio Andelic](https://github.com/antonio2368)). +* Add missing workload identity changes. [#65848](https://github.com/ClickHouse/ClickHouse/pull/65848) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Fix rocksdb. [#65858](https://github.com/ClickHouse/ClickHouse/pull/65858) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Update the list of easy tasks. [#65865](https://github.com/ClickHouse/ClickHouse/pull/65865) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Update CHANGELOG.md. [#65866](https://github.com/ClickHouse/ClickHouse/pull/65866) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* This closes [#43003](https://github.com/ClickHouse/ClickHouse/issues/43003). [#65870](https://github.com/ClickHouse/ClickHouse/pull/65870) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Uninteresting changes. [#65871](https://github.com/ClickHouse/ClickHouse/pull/65871) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Max sessions for user tests improvements. [#65888](https://github.com/ClickHouse/ClickHouse/pull/65888) ([Alexey Gerasimchuck](https://github.com/Demilivor)). +* Update version_date.tsv and changelogs after v24.6.1.4423-stable. [#65909](https://github.com/ClickHouse/ClickHouse/pull/65909) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Remove standalone Keeper build. [#65910](https://github.com/ClickHouse/ClickHouse/pull/65910) ([Antonio Andelic](https://github.com/antonio2368)). +* Add extra profiling helpers for Keeper. [#65918](https://github.com/ClickHouse/ClickHouse/pull/65918) ([Antonio Andelic](https://github.com/antonio2368)). +* PostgreSQL source cancel query comments. [#65919](https://github.com/ClickHouse/ClickHouse/pull/65919) ([Maksim Kita](https://github.com/kitaisreal)). +* Remove mysqlxx::Pool::Entry assignment operator. [#65920](https://github.com/ClickHouse/ClickHouse/pull/65920) ([Azat Khuzhin](https://github.com/azat)). +* No random settings for a test with `Object(JSON)`. [#65921](https://github.com/ClickHouse/ClickHouse/pull/65921) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Follow up to [#65046](https://github.com/ClickHouse/ClickHouse/issues/65046). [#65928](https://github.com/ClickHouse/ClickHouse/pull/65928) ([Kseniia Sumarokova](https://github.com/kssenii)). +* add restriction for storage join. [#65936](https://github.com/ClickHouse/ClickHouse/pull/65936) ([Han Fei](https://github.com/hanfei1991)). +* Update version_date.tsv and changelogs after v24.5.4.49-stable. [#65937](https://github.com/ClickHouse/ClickHouse/pull/65937) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Add table name to MergeTreeSource spans. [#65940](https://github.com/ClickHouse/ClickHouse/pull/65940) ([Nikita Taranov](https://github.com/nickitat)). +* Fix SettingsChangesHistory 24.7. [#65945](https://github.com/ClickHouse/ClickHouse/pull/65945) ([Raúl Marín](https://github.com/Algunenano)). +* Fix logical error "Expected ReadBufferFromFile, but got DB::EmptyReadBuffer". [#65949](https://github.com/ClickHouse/ClickHouse/pull/65949) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Use -Og instead of -O0 for debug builds. [#65953](https://github.com/ClickHouse/ClickHouse/pull/65953) ([Michael Kolupaev](https://github.com/al13n321)). +* Fix data race for Keeper snapshot queue. [#65970](https://github.com/ClickHouse/ClickHouse/pull/65970) ([Antonio Andelic](https://github.com/antonio2368)). +* Minor changes in CHANGELOG. [#65971](https://github.com/ClickHouse/ClickHouse/pull/65971) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove unnatural punctuation from Parquet. [#65972](https://github.com/ClickHouse/ClickHouse/pull/65972) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Try fix "Check timeout expired" without any server logs in report in stateless tests. [#65977](https://github.com/ClickHouse/ClickHouse/pull/65977) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix support of non-const scale arguments in rounding functions. [#65983](https://github.com/ClickHouse/ClickHouse/pull/65983) ([Mikhail Gorshkov](https://github.com/mgorshkov)). +* More aesthetic error messages. [#65985](https://github.com/ClickHouse/ClickHouse/pull/65985) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix race in s3queue. [#65986](https://github.com/ClickHouse/ClickHouse/pull/65986) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Now it's possible to specify `s3-storage`, `azure-object-storage` and in general `object-storage`. [#65988](https://github.com/ClickHouse/ClickHouse/pull/65988) ([alesapin](https://github.com/alesapin)). +* Fix flaky test_storage_s3_queue tests. [#66009](https://github.com/ClickHouse/ClickHouse/pull/66009) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Relax the check in 02982_aggregation_states_destruction. [#66011](https://github.com/ClickHouse/ClickHouse/pull/66011) ([Nikita Taranov](https://github.com/nickitat)). +* Fix `01158_zookeeper_log_long`. [#66012](https://github.com/ClickHouse/ClickHouse/pull/66012) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Remove scary jemalloc log. [#66028](https://github.com/ClickHouse/ClickHouse/pull/66028) ([Antonio Andelic](https://github.com/antonio2368)). +* Move experimental settings to the experimental block. [#66030](https://github.com/ClickHouse/ClickHouse/pull/66030) ([Raúl Marín](https://github.com/Algunenano)). +* Fix lock-order-inversion in DatabaseCatalog. [#66038](https://github.com/ClickHouse/ClickHouse/pull/66038) ([Nikolay Degterinsky](https://github.com/evillique)). +* Try disabling jemalloc background threads. [#66041](https://github.com/ClickHouse/ClickHouse/pull/66041) ([Antonio Andelic](https://github.com/antonio2368)). +* Try to avoid conflicts in `SettingsChangesHistory.cpp`. [#66042](https://github.com/ClickHouse/ClickHouse/pull/66042) ([Anton Popov](https://github.com/CurtizJ)). +* Add profile events for regex cache. [#66050](https://github.com/ClickHouse/ClickHouse/pull/66050) ([Antonio Andelic](https://github.com/antonio2368)). +* Bump vectorscan to 5.4.10.1. [#66056](https://github.com/ClickHouse/ClickHouse/pull/66056) ([Robert Schulze](https://github.com/rschu1ze)). +* Remove obsolete comment. [#66059](https://github.com/ClickHouse/ClickHouse/pull/66059) ([Robert Schulze](https://github.com/rschu1ze)). +* Maybe fix tsan assert in `test_mysql_killed_while_insert_8_0`. [#66064](https://github.com/ClickHouse/ClickHouse/pull/66064) ([Robert Schulze](https://github.com/rschu1ze)). +* Move some of `HTTPHandler` stuff to separate source files in order to reuse it in `PrometheusRequestHandler`. This PR is required for https://github.com/ClickHouse/ClickHouse/pull/64183. [#66067](https://github.com/ClickHouse/ClickHouse/pull/66067) ([Vitaly Baranov](https://github.com/vitlibar)). +* Bump rocksdb to v6.23.3. [#66068](https://github.com/ClickHouse/ClickHouse/pull/66068) ([Robert Schulze](https://github.com/rschu1ze)). +* Add protobufs for `Prometheus` `remote-write` / `remote-read` protocols to our repository. Fix cmake script for compiling protobufs. [#66069](https://github.com/ClickHouse/ClickHouse/pull/66069) ([Vitaly Baranov](https://github.com/vitlibar)). +* Use pinned versions of all python packages in CI docker images. Also makes clang-18.1.8 work with sanitizers and surprisingly fixes [#66049](https://github.com/ClickHouse/ClickHouse/issues/66049). [#66070](https://github.com/ClickHouse/ClickHouse/pull/66070) ([alesapin](https://github.com/alesapin)). +* Clean-up custom LLVM 15 patches. [#66072](https://github.com/ClickHouse/ClickHouse/pull/66072) ([Robert Schulze](https://github.com/rschu1ze)). +* Minor JWT client fixes. [#66073](https://github.com/ClickHouse/ClickHouse/pull/66073) ([Konstantin Bogdanov](https://github.com/thevar1able)). +* Bump vectorscan to 5.4.11. [#66082](https://github.com/ClickHouse/ClickHouse/pull/66082) ([Robert Schulze](https://github.com/rschu1ze)). +* Print stacktrace in case of abort after logical error. [#66091](https://github.com/ClickHouse/ClickHouse/pull/66091) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* fix flaky 03172_error_log_table_not_empty. [#66093](https://github.com/ClickHouse/ClickHouse/pull/66093) ([Sema Checherinda](https://github.com/CheSema)). +* Bump s2geometry to latest master. [#66094](https://github.com/ClickHouse/ClickHouse/pull/66094) ([Robert Schulze](https://github.com/rschu1ze)). +* update keeper bench example config file. [#66095](https://github.com/ClickHouse/ClickHouse/pull/66095) ([Han Fei](https://github.com/hanfei1991)). +* Avoid using source directory for generated files. [#66097](https://github.com/ClickHouse/ClickHouse/pull/66097) ([Azat Khuzhin](https://github.com/azat)). +* More precise warning message about sanitizers. [#66098](https://github.com/ClickHouse/ClickHouse/pull/66098) ([Anton Popov](https://github.com/CurtizJ)). +* Slightly better calculation of primary index. [#66099](https://github.com/ClickHouse/ClickHouse/pull/66099) ([Anton Popov](https://github.com/CurtizJ)). +* Bump Azure to 1.12. [#66100](https://github.com/ClickHouse/ClickHouse/pull/66100) ([Robert Schulze](https://github.com/rschu1ze)). +* Add a test for [#58998](https://github.com/ClickHouse/ClickHouse/issues/58998). [#66101](https://github.com/ClickHouse/ClickHouse/pull/66101) ([Anton Popov](https://github.com/CurtizJ)). +* CI: Fix sync pr merge. [#66105](https://github.com/ClickHouse/ClickHouse/pull/66105) ([Max K.](https://github.com/maxknv)). +* Remove flaky case from 02956_rocksdb_bulk_sink. [#66107](https://github.com/ClickHouse/ClickHouse/pull/66107) ([vdimir](https://github.com/vdimir)). +* Fix bugfix checker. [#66120](https://github.com/ClickHouse/ClickHouse/pull/66120) ([Raúl Marín](https://github.com/Algunenano)). +* Correctly print long processing requests in Keeper. [#66124](https://github.com/ClickHouse/ClickHouse/pull/66124) ([Antonio Andelic](https://github.com/antonio2368)). +* Update version_date.tsv and changelogs after v24.6.2.17-stable. [#66127](https://github.com/ClickHouse/ClickHouse/pull/66127) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Bump s2geometry again. [#66136](https://github.com/ClickHouse/ClickHouse/pull/66136) ([Robert Schulze](https://github.com/rschu1ze)). +* Switch submodule `contrib/orc` to a proper commit in the [main](https://github.com/ClickHouse/orc/tree/main) branch. Previously a commit from a removed branch was used ([see](https://github.com/ClickHouse/orc/pull/13)). [#66137](https://github.com/ClickHouse/ClickHouse/pull/66137) ([Vitaly Baranov](https://github.com/vitlibar)). +* Finalize MergedBlockOutputStream in dtor. [#66138](https://github.com/ClickHouse/ClickHouse/pull/66138) ([Nikita Taranov](https://github.com/nickitat)). +* Proper destruction order of AsyncLoader::Pool fields. [#66145](https://github.com/ClickHouse/ClickHouse/pull/66145) ([Sergei Trifonov](https://github.com/serxa)). +* Playing minesweeper with build system. [#66147](https://github.com/ClickHouse/ClickHouse/pull/66147) ([Nikita Taranov](https://github.com/nickitat)). +* Fix clang-tidy error in BufferWithOwnMemory.h. [#66161](https://github.com/ClickHouse/ClickHouse/pull/66161) ([Nikita Taranov](https://github.com/nickitat)). +* Use peak_threads_usage instead of arrayUniq(thread_ids) in tests. [#66162](https://github.com/ClickHouse/ClickHouse/pull/66162) ([Azat Khuzhin](https://github.com/azat)). +* Fix crash when adding empty tuple to query cache. [#66168](https://github.com/ClickHouse/ClickHouse/pull/66168) ([Michael Kolupaev](https://github.com/al13n321)). +* tests: fix 01563_distributed_query_finish flakiness (due to system.*_log_sender). [#66171](https://github.com/ClickHouse/ClickHouse/pull/66171) ([Azat Khuzhin](https://github.com/azat)). +* Refactor `OptimizeIfWithConstantConditionVisitor` using `InDepthNodeVisitor`. [#66184](https://github.com/ClickHouse/ClickHouse/pull/66184) ([zhongyuankai](https://github.com/zhongyuankai)). +* Update README.md. [#66186](https://github.com/ClickHouse/ClickHouse/pull/66186) ([Tyler Hannan](https://github.com/tylerhannan)). +* Fix 01246_buffer_flush flakiness. [#66188](https://github.com/ClickHouse/ClickHouse/pull/66188) ([Azat Khuzhin](https://github.com/azat)). +* Avoid using harmful function `rand()` in grpc. [#66191](https://github.com/ClickHouse/ClickHouse/pull/66191) ([Vitaly Baranov](https://github.com/vitlibar)). +* Bump RocksDB. [#66216](https://github.com/ClickHouse/ClickHouse/pull/66216) ([Robert Schulze](https://github.com/rschu1ze)). +* Update README.md. [#66217](https://github.com/ClickHouse/ClickHouse/pull/66217) ([Tyler Hannan](https://github.com/tylerhannan)). +* Fixes peak_threads_usage metric when materialised views are involved. [#66230](https://github.com/ClickHouse/ClickHouse/pull/66230) ([Sema Checherinda](https://github.com/CheSema)). +* Remove test as requested in https://github.com/ClickHouse/ClickHouse/pull/65277#issuecomment-2211361465. [#66233](https://github.com/ClickHouse/ClickHouse/pull/66233) ([Arthur Passos](https://github.com/arthurpassos)). +* Fix test `00504_mergetree_arrays_rw.sql`. [#66248](https://github.com/ClickHouse/ClickHouse/pull/66248) ([Anton Popov](https://github.com/CurtizJ)). +* CI: Do not finalize CI running status unless all success. [#66276](https://github.com/ClickHouse/ClickHouse/pull/66276) ([Max K.](https://github.com/maxknv)). +* Collect core dumps in more tests. [#66281](https://github.com/ClickHouse/ClickHouse/pull/66281) ([Antonio Andelic](https://github.com/antonio2368)). +* Add a stateless test for gRPC protocol. [#66284](https://github.com/ClickHouse/ClickHouse/pull/66284) ([Vitaly Baranov](https://github.com/vitlibar)). +* Log message: Failed to connect to replica ... [#66289](https://github.com/ClickHouse/ClickHouse/pull/66289) ([Igor Nikonov](https://github.com/devcrafter)). +* Update run.sh. [#66290](https://github.com/ClickHouse/ClickHouse/pull/66290) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Some changes in the codebase as a preparation for LLVM 18. [#66293](https://github.com/ClickHouse/ClickHouse/pull/66293) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* There's some problem with iptables in parallel tests. [#66304](https://github.com/ClickHouse/ClickHouse/pull/66304) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Disable broken cases from 02911_join_on_nullsafe_optimization. [#66310](https://github.com/ClickHouse/ClickHouse/pull/66310) ([vdimir](https://github.com/vdimir)). +* Build failure if ENABLE_AWS_S3 is OFF fixed. [#66335](https://github.com/ClickHouse/ClickHouse/pull/66335) ([Ilya Golshtein](https://github.com/ilejn)). +* Enable checks in assert_cast under sanitizers. [#66336](https://github.com/ClickHouse/ClickHouse/pull/66336) ([Nikita Taranov](https://github.com/nickitat)). +* Create release workflow. [#66339](https://github.com/ClickHouse/ClickHouse/pull/66339) ([Max K.](https://github.com/maxknv)). +* Fix invalid XML. [#66342](https://github.com/ClickHouse/ClickHouse/pull/66342) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Fix possible deadlock for jemalloc with enabled profiler. [#66346](https://github.com/ClickHouse/ClickHouse/pull/66346) ([Azat Khuzhin](https://github.com/azat)). +* Fix test_parallel_replicas_custom_key. [#66349](https://github.com/ClickHouse/ClickHouse/pull/66349) ([Antonio Andelic](https://github.com/antonio2368)). +* Collect logs from `minio` in stateless and statefull tests. [#66353](https://github.com/ClickHouse/ClickHouse/pull/66353) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Fix "Sending a batch of X files to Y (0.00 rows, 0.00 B bytes)." in case of batch restoring. [#66375](https://github.com/ClickHouse/ClickHouse/pull/66375) ([Azat Khuzhin](https://github.com/azat)). +* Fix 03030_system_flush_distributed_settings flakiness. [#66376](https://github.com/ClickHouse/ClickHouse/pull/66376) ([Azat Khuzhin](https://github.com/azat)). +* PR cleanup: remove redundant code. [#66380](https://github.com/ClickHouse/ClickHouse/pull/66380) ([Igor Nikonov](https://github.com/devcrafter)). +* New slack bot to post messages about CI events - Post message if OOM. [#66392](https://github.com/ClickHouse/ClickHouse/pull/66392) ([Max K.](https://github.com/maxknv)). +* Ignore subquery for IN in DDLLoadingDependencyVisitor. [#66395](https://github.com/ClickHouse/ClickHouse/pull/66395) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Update test_storage_rabbitmq/test.py. [#66396](https://github.com/ClickHouse/ClickHouse/pull/66396) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Add query elapsed time for non-default format in play UI. [#66398](https://github.com/ClickHouse/ClickHouse/pull/66398) ([Azat Khuzhin](https://github.com/azat)). +* Untangle setting headers. [#66404](https://github.com/ClickHouse/ClickHouse/pull/66404) ([Raúl Marín](https://github.com/Algunenano)). +* Remove noisy message. [#66406](https://github.com/ClickHouse/ClickHouse/pull/66406) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* If job detected as in-progress in PR workflow run - just remove it from todo list, as it not affected by the change. [#66407](https://github.com/ClickHouse/ClickHouse/pull/66407) ([Max K.](https://github.com/maxknv)). +* CI: CIBuddy to post from master only. [#66417](https://github.com/ClickHouse/ClickHouse/pull/66417) ([Max K.](https://github.com/maxknv)). +* Add a test for [#66333](https://github.com/ClickHouse/ClickHouse/issues/66333). [#66432](https://github.com/ClickHouse/ClickHouse/pull/66432) ([max-vostrikov](https://github.com/max-vostrikov)). +* Limit number of linker jobs on arm to avoid OOM during build. [#66435](https://github.com/ClickHouse/ClickHouse/pull/66435) ([Nikita Taranov](https://github.com/nickitat)). +* [RFC] Fix jemalloc assertion due to non-monotonic CLOCK_MONOTONIC_COARSE. [#66439](https://github.com/ClickHouse/ClickHouse/pull/66439) ([Azat Khuzhin](https://github.com/azat)). +* CI: Do not block CI on few number of test failures. [#66440](https://github.com/ClickHouse/ClickHouse/pull/66440) ([Max K.](https://github.com/maxknv)). +* Stateless tests: fix flaky tests 01037_polygon_dicts*. [#66445](https://github.com/ClickHouse/ClickHouse/pull/66445) ([Nikita Fomichev](https://github.com/fm4v)). +* Related to https://github.com/ClickHouse/ClickHouse/pull/62067 https://s3.amazonaws.com/clickhouse-test-reports/66410/5557dce188cabc7477bb4e874d47e3b80278ee66/stateless_tests__release_.html ``` 2024-07-12 16:04:29 +Queries for alter_table did not finish automatically after 250+ seconds 2024-07-12 16:04:29 +==================== QUERIES ==================== 2024-07-12 16:04:29 +Row 1: 2024-07-12 16:04:29 +────── 2024-07-12 16:04:29 +is_initial_query: 1 2024-07-12 16:04:29 +user: default 2024-07-12 16:04:29 +query_id: b43ffd7d-aee6-4161-aa82-bf9fff9d78c0 2024-07-12 16:04:29 +address: ::1 2024-07-12 16:04:29 +port: 58360 2024-07-12 16:04:29 +initial_user: default 2024-07-12 16:04:29 +initial_query_id: b43ffd7d-aee6-4161-aa82-bf9fff9d78c0 ... 2024-07-12 16:04:29 +query: OPTIMIZE TABLE alter_table0 FINAL ```. [#66460](https://github.com/ClickHouse/ClickHouse/pull/66460) ([Alexander Tokmakov](https://github.com/tavplubix)). +* OOM error was not visible since process is killed and status is not set Change sets ERROR status if job was killed. [#66463](https://github.com/ClickHouse/ClickHouse/pull/66463) ([Max K.](https://github.com/maxknv)). +* Add AST fuzzers jobs for CI caching so that they can be skipped in PRs not related to build or tests. [#66468](https://github.com/ClickHouse/ClickHouse/pull/66468) ([Max K.](https://github.com/maxknv)). +* If job with the same digest has been seen in master's CI it should be skipped in PR run. [#66471](https://github.com/ClickHouse/ClickHouse/pull/66471) ([Max K.](https://github.com/maxknv)). +* CI: Check job's exit status and report if killed. [#66477](https://github.com/ClickHouse/ClickHouse/pull/66477) ([Max K.](https://github.com/maxknv)). +* This closes [#37557](https://github.com/ClickHouse/ClickHouse/issues/37557). [#66482](https://github.com/ClickHouse/ClickHouse/pull/66482) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* CI: Add retry for GH set_status_comment call. [#66488](https://github.com/ClickHouse/ClickHouse/pull/66488) ([Max K.](https://github.com/maxknv)). +* OpenSSL: Minor follow-up to [#66064](https://github.com/ClickHouse/ClickHouse/issues/66064). [#66489](https://github.com/ClickHouse/ClickHouse/pull/66489) ([Robert Schulze](https://github.com/rschu1ze)). +* CI: Fix for job filtering in PRs. [#66490](https://github.com/ClickHouse/ClickHouse/pull/66490) ([Max K.](https://github.com/maxknv)). +* CI: Create release workflow updates. [#66498](https://github.com/ClickHouse/ClickHouse/pull/66498) ([Max K.](https://github.com/maxknv)). +* Add one more revision to ignore. [#66499](https://github.com/ClickHouse/ClickHouse/pull/66499) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Unit tests vomit a ton of garbage, see https://s3.amazonaws.com/clickhouse-test-reports/66457/0c82dc91f07b29ba503d7579c7d3ebecba532b73/unit_tests__tsan_/run.log - remove it. [#66501](https://github.com/ClickHouse/ClickHouse/pull/66501) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix double whitespace in EXPLAIN AST CREATE. [#66505](https://github.com/ClickHouse/ClickHouse/pull/66505) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix bad test `02530_dictionaries_update_field`. [#66507](https://github.com/ClickHouse/ClickHouse/pull/66507) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Https://play.clickhouse.com/play?user=play#u0vmrunuignozwnrx3n0yxj0x3rpbwusignozwnrx25hbwusihrlc3rfbmftzswgcmvwb3j0x3vybapguk9nignozwnrcwpxsevsrsbjagvja19zdgfydf90aw1lid49ig5vdygpic0gsu5urvjwquwgmjqwiehpvvikicagieforcbwdwxsx3jlcxvlc3rfbnvtymvyid0gmaogicagqu5eihrlc3rfc3rhdhvzice9icdts0lquevejwogicagqu5eihrlc3rfc3rhdhvziexjs0ugj0yljwogicagqu5eignozwnrx3n0yxr1cyahpsanc3vjy2vzcyckicagieforcbwb3npdglvbih0zxn0x25hbwusicdhcgfjagvfyxjyb3cnksa+idakt1jervigqlkgy2hly2tfc3rhcnrfdgltzq==. [#66508](https://github.com/ClickHouse/ClickHouse/pull/66508) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix MSan report in GRPC. [#66509](https://github.com/ClickHouse/ClickHouse/pull/66509) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* CI: Fix for skipping Builds_2 in PRs' CI. [#66512](https://github.com/ClickHouse/ClickHouse/pull/66512) ([Max K.](https://github.com/maxknv)). +* CI: Do not block Tests_3 unless MAX_FAILED_TESTS exceeded. [#66513](https://github.com/ClickHouse/ClickHouse/pull/66513) ([Max K.](https://github.com/maxknv)). +* Fix `02918_parallel_replicas_custom_key_unavailable_replica`. [#66516](https://github.com/ClickHouse/ClickHouse/pull/66516) ([Antonio Andelic](https://github.com/antonio2368)). +* Stateless tests: improvements related to OOM of test runs. [#66520](https://github.com/ClickHouse/ClickHouse/pull/66520) ([Nikita Fomichev](https://github.com/fm4v)). +* Tests: rename bad log names. [#66522](https://github.com/ClickHouse/ClickHouse/pull/66522) ([Nikita Fomichev](https://github.com/fm4v)). +* Add additional log masking in CI. [#66523](https://github.com/ClickHouse/ClickHouse/pull/66523) ([Raúl Marín](https://github.com/Algunenano)). +* CI: Multiple fixes for handling killed jobs. [#66524](https://github.com/ClickHouse/ClickHouse/pull/66524) ([Max K.](https://github.com/maxknv)). +* Allow GWP Asan allocations only when initialization is finished. [#66526](https://github.com/ClickHouse/ClickHouse/pull/66526) ([Alexey Katsman](https://github.com/alexkats)). +* Update 02443_detach_attach_partition.sh. [#66529](https://github.com/ClickHouse/ClickHouse/pull/66529) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Follow up [#66435](https://github.com/ClickHouse/ClickHouse/issues/66435). [#66530](https://github.com/ClickHouse/ClickHouse/pull/66530) ([Nikita Taranov](https://github.com/nickitat)). +* fix log in keeper tcp handler. [#66531](https://github.com/ClickHouse/ClickHouse/pull/66531) ([Han Fei](https://github.com/hanfei1991)). +* CI: Report job start and finish to CI DB. [#66533](https://github.com/ClickHouse/ClickHouse/pull/66533) ([Max K.](https://github.com/maxknv)). +* Update 01396_inactive_replica_cleanup_nodes_zookeeper.sh. [#66535](https://github.com/ClickHouse/ClickHouse/pull/66535) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Add dedicated runner to libfuzzer, update docker. [#66551](https://github.com/ClickHouse/ClickHouse/pull/66551) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* fix tidy build. [#66552](https://github.com/ClickHouse/ClickHouse/pull/66552) ([Sema Checherinda](https://github.com/CheSema)). +* No-op env change. [#66553](https://github.com/ClickHouse/ClickHouse/pull/66553) ([Raúl Marín](https://github.com/Algunenano)). +* Fix typo in new_delete.cpp. [#66554](https://github.com/ClickHouse/ClickHouse/pull/66554) ([alesapin](https://github.com/alesapin)). +* Fix something in Fast Test. [#66558](https://github.com/ClickHouse/ClickHouse/pull/66558) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* update trusted contributors. [#66561](https://github.com/ClickHouse/ClickHouse/pull/66561) ([Xu Jia](https://github.com/XuJia0210)). +* Delete bad test `02805_distributed_queries_timeouts`. [#66563](https://github.com/ClickHouse/ClickHouse/pull/66563) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* More clarity in the test `03001_consider_lwd_when_merge`. [#66564](https://github.com/ClickHouse/ClickHouse/pull/66564) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Stateless tests: temporary disable sequential tests in parallel. [#66585](https://github.com/ClickHouse/ClickHouse/pull/66585) ([Nikita Fomichev](https://github.com/fm4v)). +* Move view targets to separate AST class `ASTViewTargets` in order to allow extending it to support more kinds of view targets. [#66590](https://github.com/ClickHouse/ClickHouse/pull/66590) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix bsdtar for @nikitamikhaylov. [#66592](https://github.com/ClickHouse/ClickHouse/pull/66592) ([alesapin](https://github.com/alesapin)). +* CI: POC for Auto Releases. [#66593](https://github.com/ClickHouse/ClickHouse/pull/66593) ([Max K.](https://github.com/maxknv)). +* Fix clang tidy after [#66402](https://github.com/ClickHouse/ClickHouse/issues/66402). [#66597](https://github.com/ClickHouse/ClickHouse/pull/66597) ([vdimir](https://github.com/vdimir)). +* Adjust the runtime of some slow performance test. [#66619](https://github.com/ClickHouse/ClickHouse/pull/66619) ([Robert Schulze](https://github.com/rschu1ze)). +* CI: Scale down AutoScaling Groups from runners. [#66622](https://github.com/ClickHouse/ClickHouse/pull/66622) ([Max K.](https://github.com/maxknv)). +* Allow to run clang-tidy with clang-19. [#66625](https://github.com/ClickHouse/ClickHouse/pull/66625) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix terrible test @arthurpassos. [#66632](https://github.com/ClickHouse/ClickHouse/pull/66632) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix bad log message. [#66633](https://github.com/ClickHouse/ClickHouse/pull/66633) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Negative sign in prewhere optimization. [#66637](https://github.com/ClickHouse/ClickHouse/pull/66637) ([cangyin](https://github.com/cangyin)). +* Closes [#66639](https://github.com/ClickHouse/ClickHouse/issues/66639#event-13533944949). [#66640](https://github.com/ClickHouse/ClickHouse/pull/66640) ([Kruglov Pavel](https://github.com/Avogar)). +* Avoid generating named tuple for special keywords (null, true, false). [#66641](https://github.com/ClickHouse/ClickHouse/pull/66641) ([Amos Bird](https://github.com/amosbird)). +* rearrange heavy tests 03008_deduplication. [#66642](https://github.com/ClickHouse/ClickHouse/pull/66642) ([Sema Checherinda](https://github.com/CheSema)). +* Fix data race in S3::ClientCache. [#66644](https://github.com/ClickHouse/ClickHouse/pull/66644) ([Konstantin Morozov](https://github.com/k-morozov)). +* CI: Remove aws lambda packages from oss. [#66651](https://github.com/ClickHouse/ClickHouse/pull/66651) ([Max K.](https://github.com/maxknv)). +* Introduce separate DEBUG_OR_SANITIZER_BUILD macro. [#66652](https://github.com/ClickHouse/ClickHouse/pull/66652) ([Nikita Taranov](https://github.com/nickitat)). +* Increase backoff because with slow builds sometimes 100ms is not enough to recover. [#66653](https://github.com/ClickHouse/ClickHouse/pull/66653) ([alesapin](https://github.com/alesapin)). +* Fix wrong queries hung error because of 02044_url_glob_parallel_connection_refused. [#66657](https://github.com/ClickHouse/ClickHouse/pull/66657) ([Nikita Taranov](https://github.com/nickitat)). +* add log for splitBlockIntoParts. [#66658](https://github.com/ClickHouse/ClickHouse/pull/66658) ([Han Fei](https://github.com/hanfei1991)). +* Minor: Make `CaseSensitiveness` an enum class. [#66673](https://github.com/ClickHouse/ClickHouse/pull/66673) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix bad test `02210_processors_profile_log`. [#66684](https://github.com/ClickHouse/ClickHouse/pull/66684) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix something around clang-tidy. [#66694](https://github.com/ClickHouse/ClickHouse/pull/66694) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* ci: dump dmesg in case of OOM. [#66705](https://github.com/ClickHouse/ClickHouse/pull/66705) ([Azat Khuzhin](https://github.com/azat)). +* fix clang tidy. [#66706](https://github.com/ClickHouse/ClickHouse/pull/66706) ([Han Fei](https://github.com/hanfei1991)). +* Https://s3.amazonaws.com/clickhouse-test-reports/61109/5cf2b53f146c1a4f24d8212f9f810d587c46bfc0/stateless_tests__release_.html. [#66724](https://github.com/ClickHouse/ClickHouse/pull/66724) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Support one more case in JOIN ON ... IS NULL. [#66725](https://github.com/ClickHouse/ClickHouse/pull/66725) ([vdimir](https://github.com/vdimir)). +* CI: Fix issue with a skipped Build report. [#66726](https://github.com/ClickHouse/ClickHouse/pull/66726) ([Max K.](https://github.com/maxknv)). +* relax condition in test, remove unused counters. [#66730](https://github.com/ClickHouse/ClickHouse/pull/66730) ([Sema Checherinda](https://github.com/CheSema)). +* Remove bad test `host_resolver_fail_count`. [#66731](https://github.com/ClickHouse/ClickHouse/pull/66731) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix bad test `03036_join_filter_push_down_equivalent_sets`. [#66736](https://github.com/ClickHouse/ClickHouse/pull/66736) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix bad tests `long_select_and_alter`. [#66737](https://github.com/ClickHouse/ClickHouse/pull/66737) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add test test_storage_mysql/test.py::test_joins. [#66743](https://github.com/ClickHouse/ClickHouse/pull/66743) ([vdimir](https://github.com/vdimir)). +* Disallow build exclusion only by CI settings (ci_include_, ci_exclude_) to avoid running builds in auto sync prs. [#66744](https://github.com/ClickHouse/ClickHouse/pull/66744) ([Max K.](https://github.com/maxknv)). +* Use non-existent address to check connection error at table creation. [#66760](https://github.com/ClickHouse/ClickHouse/pull/66760) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Backported in [#67063](https://github.com/ClickHouse/ClickHouse/issues/67063): Increase asio pool size in case the server is tiny. [#66761](https://github.com/ClickHouse/ClickHouse/pull/66761) ([alesapin](https://github.com/alesapin)). +* Fix flakiness of async insert tests due to adaptive timeout. [#66771](https://github.com/ClickHouse/ClickHouse/pull/66771) ([Raúl Marín](https://github.com/Algunenano)). +* Attempt to fix flakiness of 01194_http_query_id. [#66774](https://github.com/ClickHouse/ClickHouse/pull/66774) ([Raúl Marín](https://github.com/Algunenano)). +* Turn off randomization of harmful setting. [#66776](https://github.com/ClickHouse/ClickHouse/pull/66776) ([alesapin](https://github.com/alesapin)). +* The number of batches was reduced in https://github.com/ClickHouse/ClickHouse/pull/65186, but then the parallel execution was disabled in https://github.com/ClickHouse/ClickHouse/pull/66585. So now tasks fail with timeout sometimes: https://s3.amazonaws.com/clickhouse-test-reports/66724/36275fdacc34206931f69087fe77539e25bbbedd/stateless_tests__tsan__s3_storage__[2_3].html. [#66783](https://github.com/ClickHouse/ClickHouse/pull/66783) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Remove obsolete code from CMakeLists. [#66786](https://github.com/ClickHouse/ClickHouse/pull/66786) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Stateless tests: split parallel tests more evenly. [#66787](https://github.com/ClickHouse/ClickHouse/pull/66787) ([Nikita Fomichev](https://github.com/fm4v)). +* Fix test `02724_limit_num_mutations.sh`. [#66788](https://github.com/ClickHouse/ClickHouse/pull/66788) ([Anton Popov](https://github.com/CurtizJ)). +* Better diagnostics in `test_disk_configuration`. [#66802](https://github.com/ClickHouse/ClickHouse/pull/66802) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix bad test `02950_part_log_bytes_uncompressed`. [#66803](https://github.com/ClickHouse/ClickHouse/pull/66803) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Better diagnostics for test trace_events_stress. [#66804](https://github.com/ClickHouse/ClickHouse/pull/66804) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Make test `00997_set_index_array` lighter. [#66817](https://github.com/ClickHouse/ClickHouse/pull/66817) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Miscellaneous. [#66818](https://github.com/ClickHouse/ClickHouse/pull/66818) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix inconsistent formatting of lambda functions inside composite types. [#66819](https://github.com/ClickHouse/ClickHouse/pull/66819) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Small fix in realloc memory tracking. [#66820](https://github.com/ClickHouse/ClickHouse/pull/66820) ([Antonio Andelic](https://github.com/antonio2368)). +* Never await in CI on BuildReport - just redo (await can be longer) - Remove BuildReport if no build jobs in workflow (for instance: Docs change) - Do not fail CheckReadyForMerge job if the only non-green status is Cloud Sync. [#66822](https://github.com/ClickHouse/ClickHouse/pull/66822) ([Max K.](https://github.com/maxknv)). +* Remove bad tests @azat. [#66823](https://github.com/ClickHouse/ClickHouse/pull/66823) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* CI: New Release workflow updates and fixes. [#66830](https://github.com/ClickHouse/ClickHouse/pull/66830) ([Max K.](https://github.com/maxknv)). +* Fix signed integer overflow in function `age`. [#66831](https://github.com/ClickHouse/ClickHouse/pull/66831) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix RocksDB bs. [#66838](https://github.com/ClickHouse/ClickHouse/pull/66838) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Split a test for index. [#66839](https://github.com/ClickHouse/ClickHouse/pull/66839) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix inconsistent formatting of `NOT ((SELECT ...))`. [#66840](https://github.com/ClickHouse/ClickHouse/pull/66840) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Make test `01592_long_window_functions1` lighter. [#66841](https://github.com/ClickHouse/ClickHouse/pull/66841) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* What if I will change the test for SSL authentication?. [#66844](https://github.com/ClickHouse/ClickHouse/pull/66844) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Functions `[a-g]*`: Iterate over `input_rows_count` where appropriate. [#66846](https://github.com/ClickHouse/ClickHouse/pull/66846) ([Robert Schulze](https://github.com/rschu1ze)). +* Revert libunwind patch. [#66850](https://github.com/ClickHouse/ClickHouse/pull/66850) ([Antonio Andelic](https://github.com/antonio2368)). +* Split test 03038_nested_dynamic_merges to avoid timeouts. [#66863](https://github.com/ClickHouse/ClickHouse/pull/66863) ([Kruglov Pavel](https://github.com/Avogar)). +* CI: Print instance info in runner's init script. [#66868](https://github.com/ClickHouse/ClickHouse/pull/66868) ([Max K.](https://github.com/maxknv)). +* Backported in [#67257](https://github.com/ClickHouse/ClickHouse/issues/67257): Followup [#66725](https://github.com/ClickHouse/ClickHouse/issues/66725). [#66869](https://github.com/ClickHouse/ClickHouse/pull/66869) ([vdimir](https://github.com/vdimir)). +* CI: CI Buddy to notify about fatal workflow failures. [#66890](https://github.com/ClickHouse/ClickHouse/pull/66890) ([Max K.](https://github.com/maxknv)). +* CI: Add ec2 instance lifecycle metadata to CIDB. [#66918](https://github.com/ClickHouse/ClickHouse/pull/66918) ([Max K.](https://github.com/maxknv)). +* CI: Remove ci runners scripts from oss. [#66920](https://github.com/ClickHouse/ClickHouse/pull/66920) ([Max K.](https://github.com/maxknv)). +* Backported in [#67209](https://github.com/ClickHouse/ClickHouse/issues/67209): Decrease rate limit in `01923_network_receive_time_metric_insert`. [#66924](https://github.com/ClickHouse/ClickHouse/pull/66924) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Backported in [#67227](https://github.com/ClickHouse/ClickHouse/issues/67227): Grouparrayintersect: fix serialization bug. [#66928](https://github.com/ClickHouse/ClickHouse/pull/66928) ([Raúl Marín](https://github.com/Algunenano)). +* Backported in [#67207](https://github.com/ClickHouse/ClickHouse/issues/67207): Un-flake test_runtime_configurable_cache_size. [#66934](https://github.com/ClickHouse/ClickHouse/pull/66934) ([Robert Schulze](https://github.com/rschu1ze)). +* Backported in [#66975](https://github.com/ClickHouse/ClickHouse/issues/66975): CI: Fixes docker server build for release branches. [#66955](https://github.com/ClickHouse/ClickHouse/pull/66955) ([Max K.](https://github.com/maxknv)). +* Backported in [#67213](https://github.com/ClickHouse/ClickHouse/issues/67213): [CI Fest] Split dynamic tests and rewrite them from sh to sql to avoid timeouts. [#66981](https://github.com/ClickHouse/ClickHouse/pull/66981) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#67033](https://github.com/ClickHouse/ClickHouse/issues/67033): [CI Fest] Fix use-of-uninitialized-value in JSONExtract* numeric functions. [#66984](https://github.com/ClickHouse/ClickHouse/pull/66984) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#67051](https://github.com/ClickHouse/ClickHouse/issues/67051): CI: Fix for workflow results parsing. [#67000](https://github.com/ClickHouse/ClickHouse/pull/67000) ([Max K.](https://github.com/maxknv)). +* Backported in [#67116](https://github.com/ClickHouse/ClickHouse/issues/67116): Disable setting `optimize_functions_to_subcolumns`. [#67046](https://github.com/ClickHouse/ClickHouse/pull/67046) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#67205](https://github.com/ClickHouse/ClickHouse/issues/67205): Increase max allocation size for sanitizers. [#67049](https://github.com/ClickHouse/ClickHouse/pull/67049) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Backported in [#67124](https://github.com/ClickHouse/ClickHouse/issues/67124): Very sad failure: ``` 2024.07.24 13:28:45.517777 [ 10 ] {08745bf9-4bc1-4946-b9a8-c03d82ec55dc} executeQuery: (from 172.16.11.1:55890) OPTIMIZE TABLE replicated_mt FINAL (stage: Complete) 2024.07.24 13:28:45.525945 [ 10 ] {08745bf9-4bc1-4946-b9a8-c03d82ec55dc} default.replicated_mt (ReplicatedMergeTreeQueue): Waiting for 4 entries to be processed: queue-0000000004, queue-0000000002, queue-0000000001, queue-0000000000 2024.07.24 13:29:15.528024 [ 10 ] {08745bf9-4bc1-4946-b9a8-c03d82ec55dc} default.replicated_mt (6581a6fb-8458-466d-8350-89951eb1ac8e) (MergerMutator): Selected 3 parts from all_0_0_0 to all_2_2_0 2024.07.24 13:29:15.530736 [ 10 ] {08745bf9-4bc1-4946-b9a8-c03d82ec55dc} default.replicated_mt (6581a6fb-8458-466d-8350-89951eb1ac8e): Created log entry /clickhouse/tables/replicated_mt/log/log-0000000004 for merge all_0_2_1 2024.07.24 13:29:15.530873 [ 10 ] {08745bf9-4bc1-4946-b9a8-c03d82ec55dc} default.replicated_mt (6581a6fb-8458-466d-8350-89951eb1ac8e): Waiting for node1 to process log entry 2024.07.24 13:29:15.530919 [ 10 ] {08745bf9-4bc1-4946-b9a8-c03d82ec55dc} default.replicated_mt (6581a6fb-8458-466d-8350-89951eb1ac8e): Waiting for node1 to pull log-0000000004 to queue 2024.07.24 13:29:15.534286 [ 10 ] {08745bf9-4bc1-4946-b9a8-c03d82ec55dc} default.replicated_mt (6581a6fb-8458-466d-8350-89951eb1ac8e): Looking for node corresponding to log-0000000004 in node1 queue 2024.07.24 13:29:15.534793 [ 10 ] {08745bf9-4bc1-4946-b9a8-c03d82ec55dc} default.replicated_mt (6581a6fb-8458-466d-8350-89951eb1ac8e): Waiting for queue-0000000005 to disappear from node1 queue 2024.07.24 13:29:15.585533 [ 10 ] {08745bf9-4bc1-4946-b9a8-c03d82ec55dc} TCPHandler: Processed in 30.067804125 sec. ```. [#67067](https://github.com/ClickHouse/ClickHouse/pull/67067) ([alesapin](https://github.com/alesapin)). +* Backported in [#67203](https://github.com/ClickHouse/ClickHouse/issues/67203): Fix flaky `test_seekable_formats_url` and `test_seekable_formats` S3 storage tests. [#67070](https://github.com/ClickHouse/ClickHouse/pull/67070) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Backported in [#67222](https://github.com/ClickHouse/ClickHouse/issues/67222): Fix 2680 flasky. [#67078](https://github.com/ClickHouse/ClickHouse/pull/67078) ([jsc0218](https://github.com/jsc0218)). +* Backported in [#67190](https://github.com/ClickHouse/ClickHouse/issues/67190): Attempt to fix flakiness of some window view tests. [#67130](https://github.com/ClickHouse/ClickHouse/pull/67130) ([Robert Schulze](https://github.com/rschu1ze)). +* Backported in [#67272](https://github.com/ClickHouse/ClickHouse/issues/67272): Rename (unreleased) bad setting. [#67149](https://github.com/ClickHouse/ClickHouse/pull/67149) ([Raúl Marín](https://github.com/Algunenano)). +* Backported in [#67441](https://github.com/ClickHouse/ClickHouse/issues/67441): Try to fix 2572. [#67158](https://github.com/ClickHouse/ClickHouse/pull/67158) ([jsc0218](https://github.com/jsc0218)). +* Backported in [#67416](https://github.com/ClickHouse/ClickHouse/issues/67416): CI: Fix build results for release branches. [#67402](https://github.com/ClickHouse/ClickHouse/pull/67402) ([Max K.](https://github.com/maxknv)). + diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv index 271065a78fb..027b207d3ad 100644 --- a/utils/list-versions/version_date.tsv +++ b/utils/list-versions/version_date.tsv @@ -1,3 +1,4 @@ +v24.7.1.2915-stable 2024-07-30 v24.6.2.17-stable 2024-07-05 v24.6.1.4423-stable 2024-07-01 v24.5.4.49-stable 2024-07-01 From 8d2b804c670d0941acc4fff059859017c1bd93c2 Mon Sep 17 00:00:00 2001 From: Michael Stetsyuk Date: Tue, 30 Jul 2024 21:57:08 +0100 Subject: [PATCH 478/661] fxs --- tests/integration/test_recovery_time_metric/test.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tests/integration/test_recovery_time_metric/test.py b/tests/integration/test_recovery_time_metric/test.py index 8f369d7759c..628f2e744e0 100644 --- a/tests/integration/test_recovery_time_metric/test.py +++ b/tests/integration/test_recovery_time_metric/test.py @@ -37,9 +37,7 @@ def test_recovery_time_metric(start_cluster): """ ) - node.exec_in_container( - ["bash", "-c", "rm /var/lib/clickhouse/metadata/rdb/t.sql"] - ) + node.exec_in_container(["bash", "-c", "rm /var/lib/clickhouse/metadata/rdb/t.sql"]) node.restart_clickhouse() From 4aedb9d40298c1a3204bb72a3288ea711eb5e2f6 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 30 Jul 2024 23:02:22 +0200 Subject: [PATCH 479/661] Update test --- tests/integration/test_system_flush_logs/test.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/integration/test_system_flush_logs/test.py b/tests/integration/test_system_flush_logs/test.py index 2022f9d4a89..713b327eb76 100644 --- a/tests/integration/test_system_flush_logs/test.py +++ b/tests/integration/test_system_flush_logs/test.py @@ -13,9 +13,8 @@ node = cluster.add_instance( ) system_logs = [ - # disabled by default - ("system.text_log", 0), # enabled by default + ("system.text_log", 1), ("system.query_log", 1), ("system.query_thread_log", 1), ("system.part_log", 1), From 4a4bd97b4b63e495f76273ea8c12045dc129d81b Mon Sep 17 00:00:00 2001 From: Peter Nguyen Date: Tue, 30 Jul 2024 16:50:37 -0600 Subject: [PATCH 480/661] Fix case sensitivity for percent_rank, dense_rank, and their aliases --- src/Processors/Transforms/WindowTransform.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Processors/Transforms/WindowTransform.cpp b/src/Processors/Transforms/WindowTransform.cpp index a1b46c8e36c..5fad68e4968 100644 --- a/src/Processors/Transforms/WindowTransform.cpp +++ b/src/Processors/Transforms/WindowTransform.cpp @@ -2726,18 +2726,18 @@ void registerWindowFunctions(AggregateFunctionFactory & factory) { return std::make_shared(name, argument_types, parameters); - }, properties}, AggregateFunctionFactory::Case::Insensitive); + }, properties}); - factory.registerAlias("dense_rank", "denseRank", AggregateFunctionFactory::Case::Sensitive); + factory.registerAlias("dense_rank", "denseRank", AggregateFunctionFactory::Case::Insensitive); factory.registerFunction("percentRank", {[](const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings *) { return std::make_shared(name, argument_types, parameters); - }, properties}, AggregateFunctionFactory::Case::Insensitive); + }, properties}); - factory.registerAlias("percent_rank", "percentRank", AggregateFunctionFactory::Case::Sensitive); + factory.registerAlias("percent_rank", "percentRank", AggregateFunctionFactory::Case::Insensitive); factory.registerFunction("row_number", {[](const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings *) From 3b12fec141fda8cb2a3ef68ac96e6e58f1fd69e3 Mon Sep 17 00:00:00 2001 From: Peter Nguyen Date: Tue, 30 Jul 2024 17:05:48 -0600 Subject: [PATCH 481/661] Update dense_rank doc to mention the denseRank alias --- docs/en/sql-reference/window-functions/dense_rank.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/en/sql-reference/window-functions/dense_rank.md b/docs/en/sql-reference/window-functions/dense_rank.md index d6445b68c55..2c8617fb668 100644 --- a/docs/en/sql-reference/window-functions/dense_rank.md +++ b/docs/en/sql-reference/window-functions/dense_rank.md @@ -12,6 +12,8 @@ The [rank](./rank.md) function provides the same behaviour, but with gaps in ran **Syntax** +Alias: `denseRank` (case-sensitive) + ```sql dense_rank (column_name) OVER ([[PARTITION BY grouping_column] [ORDER BY sorting_column] From 419a5e7f730dabe514becabc6c24ec5b87325e28 Mon Sep 17 00:00:00 2001 From: Peter Nguyen Date: Tue, 30 Jul 2024 17:17:01 -0600 Subject: [PATCH 482/661] Update window-functions doc with denseRank and percentRank aliases --- docs/en/sql-reference/window-functions/index.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/en/sql-reference/window-functions/index.md b/docs/en/sql-reference/window-functions/index.md index 0c3e2ea1cb6..27d4bd763c7 100644 --- a/docs/en/sql-reference/window-functions/index.md +++ b/docs/en/sql-reference/window-functions/index.md @@ -23,8 +23,8 @@ ClickHouse supports the standard grammar for defining windows and window functio | `INTERVAL` syntax for `DateTime` `RANGE OFFSET` frame | ❌ (specify the number of seconds instead (`RANGE` works with any numeric type).) | | `GROUPS` frame | ❌ | | Calculating aggregate functions over a frame (`sum(value) over (order by time)`) | ✅ (All aggregate functions are supported) | -| `rank()`, `dense_rank()`, `row_number()` | ✅ | -| `percent_rank()` | ✅ Efficiently computes the relative standing of a value within a partition in a dataset. This function effectively replaces the more verbose and computationally intensive manual SQL calculation expressed as `ifNull((rank() OVER(PARTITION BY x ORDER BY y) - 1) / nullif(count(1) OVER(PARTITION BY x) - 1, 0), 0)`| +| `rank()`, `dense_rank()`, `row_number()` | ✅
Alias: `denseRank()` | +| `percent_rank()` | ✅ Efficiently computes the relative standing of a value within a partition in a dataset. This function effectively replaces the more verbose and computationally intensive manual SQL calculation expressed as `ifNull((rank() OVER(PARTITION BY x ORDER BY y) - 1) / nullif(count(1) OVER(PARTITION BY x) - 1, 0), 0)`
Alias: `percentRank()`| | `lag/lead(value, offset)` | ❌
You can use one of the following workarounds:
1) `any(value) over (.... rows between preceding and preceding)`, or `following` for `lead`
2) `lagInFrame/leadInFrame`, which are analogous, but respect the window frame. To get behavior identical to `lag/lead`, use `rows between unbounded preceding and unbounded following` | | ntile(buckets) | ✅
Specify window like, (partition by x order by y rows between unbounded preceding and unrounded following). | From bc312eb046db07d901e208cdc1bb0abb1df3eabd Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Tue, 30 Jul 2024 21:27:50 +0200 Subject: [PATCH 483/661] Improve check --- src/IO/S3/URI.cpp | 32 ++++++++++++------- src/Storages/StorageFile.cpp | 18 +++++------ .../03215_parsing_archive_name_s3.sql | 2 +- 3 files changed, 30 insertions(+), 22 deletions(-) diff --git a/src/IO/S3/URI.cpp b/src/IO/S3/URI.cpp index 33a4939c810..fead18315d8 100644 --- a/src/IO/S3/URI.cpp +++ b/src/IO/S3/URI.cpp @@ -55,10 +55,10 @@ URI::URI(const std::string & uri_, bool allow_archive_path_syntax) static constexpr auto OSS = "OSS"; static constexpr auto EOS = "EOS"; - if (!allow_archive_path_syntax) - uri_str = uri_; - else + if (allow_archive_path_syntax) std::tie(uri_str, archive_pattern) = getURIAndArchivePattern(uri_); + else + uri_str = uri_; uri = Poco::URI(uri_str); @@ -176,22 +176,30 @@ std::pair> URI::getURIAndArchivePattern( return {source, std::nullopt}; std::string_view path_to_archive_view = std::string_view{source}.substr(0, pos); + bool contains_spaces_around_operator = false; while (path_to_archive_view.ends_with(' ')) + { + contains_spaces_around_operator = true; path_to_archive_view.remove_suffix(1); - - if (path_to_archive_view.empty() || !hasSupportedArchiveExtension(path_to_archive_view)) - return {source, std::nullopt}; - - auto archive_uri = path_to_archive_view; + } std::string_view archive_pattern_view = std::string_view{source}.substr(pos + 2); - while (archive_pattern_view.front() == ' ') + while (archive_pattern_view.starts_with(' ')) + { + contains_spaces_around_operator = true; archive_pattern_view.remove_prefix(1); + } - if (archive_pattern_view.empty()) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Archive pattern is empty"); + /// possible situations when the first part can be archive is only if one of the following is true: + /// - it contains supported extension + /// - it contains spaces after or before :: (URI cannot contain spaces) + /// - it contains characters that could mean glob expression + if (archive_pattern_view.empty() || path_to_archive_view.empty() + || (!contains_spaces_around_operator && !hasSupportedArchiveExtension(path_to_archive_view) + && path_to_archive_view.find_first_of("*?{") == std::string_view::npos)) + return {source, std::nullopt}; - return std::pair{std::string{archive_uri}, std::string{archive_pattern_view}}; + return std::pair{std::string{path_to_archive_view}, std::string{archive_pattern_view}}; } } diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index efb39f90053..8c079aa4600 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -2258,21 +2258,21 @@ void StorageFile::parseFileSource(String source, String & filename, String & pat while (path_to_archive_view.ends_with(' ')) path_to_archive_view.remove_suffix(1); - if (path_to_archive_view.empty() || !hasSupportedArchiveExtension(path_to_archive_view)) + std::string_view filename_view = std::string_view{source}.substr(pos + 2); + while (filename_view.starts_with(' ')) + filename_view.remove_prefix(1); + + /// possible situations when the first part can be archive is only if one of the following is true: + /// - it contains supported extension + /// - it contains characters that could mean glob expression + if (filename_view.empty() || path_to_archive_view.empty() + || (!hasSupportedArchiveExtension(path_to_archive_view) && path_to_archive_view.find_first_of("*?{") == std::string_view::npos)) { filename = std::move(source); return; } path_to_archive = path_to_archive_view; - - std::string_view filename_view = std::string_view{source}.substr(pos + 2); - while (filename_view.front() == ' ') - filename_view.remove_prefix(1); - - if (filename_view.empty()) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Filename is empty"); - filename = filename_view; } diff --git a/tests/queries/0_stateless/03215_parsing_archive_name_s3.sql b/tests/queries/0_stateless/03215_parsing_archive_name_s3.sql index 3a7ed0b864c..e34be475c5a 100644 --- a/tests/queries/0_stateless/03215_parsing_archive_name_s3.sql +++ b/tests/queries/0_stateless/03215_parsing_archive_name_s3.sql @@ -2,6 +2,6 @@ -- Tag no-fasttest: Depends on AWS SELECT _file, _path FROM s3(s3_conn, filename='::03215_archive.csv') ORDER BY (_file, _path); -SELECT _file, _path FROM s3(s3_conn, filename='test :: 03215_archive.csv') ORDER BY (_file, _path); -- { serverError STD_EXCEPTION } +SELECT _file, _path FROM s3(s3_conn, filename='test :: 03215_archive.csv') ORDER BY (_file, _path); -- { serverError S3_ERROR } SELECT _file, _path FROM s3(s3_conn, filename='test::03215_archive.csv') ORDER BY (_file, _path); SELECT _file, _path FROM s3(s3_conn, filename='test.zip::03215_archive.csv') ORDER BY (_file, _path) SETTINGS allow_archive_path_syntax=0; From e664a144788b48c029f56548242baaeed82a80ff Mon Sep 17 00:00:00 2001 From: Michael Stetsyuk Date: Wed, 31 Jul 2024 08:49:14 +0100 Subject: [PATCH 484/661] fix style --- src/Databases/DatabaseReplicated.cpp | 4 ++-- src/Storages/System/StorageSystemClusters.cpp | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp index 06cea65d62e..b2be593d326 100644 --- a/src/Databases/DatabaseReplicated.cpp +++ b/src/Databases/DatabaseReplicated.cpp @@ -356,7 +356,7 @@ ReplicasInfo DatabaseReplicated::tryGetReplicasInfo(const ClusterPtr & cluster_) paths_get.emplace_back(fs::path(zookeeper_path) / "replicas" / full_name / "log_ptr"); } } - + try { auto current_zookeeper = getZooKeeper(); @@ -396,7 +396,7 @@ ReplicasInfo DatabaseReplicated::tryGetReplicasInfo(const ClusterPtr & cluster_) ++global_replica_index; } } - + return replicas_info; } catch (...) { diff --git a/src/Storages/System/StorageSystemClusters.cpp b/src/Storages/System/StorageSystemClusters.cpp index 4b9802c9089..d03b600b6ef 100644 --- a/src/Storages/System/StorageSystemClusters.cpp +++ b/src/Storages/System/StorageSystemClusters.cpp @@ -116,7 +116,7 @@ void StorageSystemClusters::writeCluster(MutableColumns & res_columns, const Nam else res_columns[i++]->insertDefault(); } - + ++global_replica_idx; } } From 5152248d438ef9162845507b68e18f1d8541a250 Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Wed, 31 Jul 2024 09:25:59 +0200 Subject: [PATCH 485/661] Add test. --- .../02864_restore_table_with_broken_part.sh | 25 ++-------------- ...ackup_with_matview_inner_table_metadata.sh | 25 ++-------------- ..._clear_old_temporary_directories.reference | 2 ++ ...kup_and_clear_old_temporary_directories.sh | 22 ++++++++++++++ .../0_stateless/backups/mt_250_parts.zip | Bin 0 -> 265998 bytes .../helpers/install_predefined_backup.sh | 27 ++++++++++++++++++ 6 files changed, 55 insertions(+), 46 deletions(-) create mode 100644 tests/queries/0_stateless/03214_backup_and_clear_old_temporary_directories.reference create mode 100755 tests/queries/0_stateless/03214_backup_and_clear_old_temporary_directories.sh create mode 100644 tests/queries/0_stateless/backups/mt_250_parts.zip create mode 100755 tests/queries/0_stateless/helpers/install_predefined_backup.sh diff --git a/tests/queries/0_stateless/02864_restore_table_with_broken_part.sh b/tests/queries/0_stateless/02864_restore_table_with_broken_part.sh index 08313e2fd3b..bf76727f76f 100755 --- a/tests/queries/0_stateless/02864_restore_table_with_broken_part.sh +++ b/tests/queries/0_stateless/02864_restore_table_with_broken_part.sh @@ -5,29 +5,8 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -# Copies a test predefined backup from "/tests/queries/0_stateless/backups/" folder to the "backups" disk, -# returns the path to the backup relative to that disk. -function install_test_backup() -{ - local test_backup_filename="$1" - local test_backup_path="$CURDIR/backups/${test_backup_filename}" - - local backups_disk_root - backups_disk_root=$($CLICKHOUSE_CLIENT --query "SELECT path FROM system.disks WHERE name='backups'") - - if [ -z "${backups_disk_root}" ]; then - echo "Disk '${backups_disk_root}' not found" - exit 1 - fi - - local install_path=${backups_disk_root}/${CLICKHOUSE_DATABASE}/${test_backup_filename} - mkdir -p "$(dirname "${install_path}")" - ln -s "${test_backup_path}" "${install_path}" - - echo "${CLICKHOUSE_DATABASE}/${test_backup_filename}" -} - -backup_name="$(install_test_backup with_broken_part.zip)" +# In this test we restore from "/tests/queries/0_stateless/backups/with_broken_part.zip" +backup_name="$($CURDIR/helpers/install_predefined_backup.sh with_broken_part.zip)" $CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS tbl" diff --git a/tests/queries/0_stateless/03001_restore_from_old_backup_with_matview_inner_table_metadata.sh b/tests/queries/0_stateless/03001_restore_from_old_backup_with_matview_inner_table_metadata.sh index 8d987dbf1df..2c70cb1e3be 100755 --- a/tests/queries/0_stateless/03001_restore_from_old_backup_with_matview_inner_table_metadata.sh +++ b/tests/queries/0_stateless/03001_restore_from_old_backup_with_matview_inner_table_metadata.sh @@ -5,29 +5,8 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -# Copies a test predefined backup from "/tests/queries/0_stateless/backups/" folder to the "backups" disk, -# returns the path to the backup relative to that disk. -function install_test_backup() -{ - local test_backup_filename="$1" - local test_backup_path="$CURDIR/backups/${test_backup_filename}" - - local backups_disk_root - backups_disk_root=$($CLICKHOUSE_CLIENT --query "SELECT path FROM system.disks WHERE name='backups'") - - if [ -z "${backups_disk_root}" ]; then - echo "Disk '${backups_disk_root}' not found" - exit 1 - fi - - local install_path=${backups_disk_root}/${CLICKHOUSE_DATABASE}/${test_backup_filename} - mkdir -p "$(dirname "${install_path}")" - ln -s "${test_backup_path}" "${install_path}" - - echo "${CLICKHOUSE_DATABASE}/${test_backup_filename}" -} - -backup_name="$(install_test_backup old_backup_with_matview_inner_table_metadata.zip)" +# In this test we restore from "/tests/queries/0_stateless/backups/old_backup_with_matview_inner_table_metadata.zip" +backup_name="$($CURDIR/helpers/install_predefined_backup.sh old_backup_with_matview_inner_table_metadata.zip)" $CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS mv" $CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS src" diff --git a/tests/queries/0_stateless/03214_backup_and_clear_old_temporary_directories.reference b/tests/queries/0_stateless/03214_backup_and_clear_old_temporary_directories.reference new file mode 100644 index 00000000000..3f3fbd9ab58 --- /dev/null +++ b/tests/queries/0_stateless/03214_backup_and_clear_old_temporary_directories.reference @@ -0,0 +1,2 @@ +RESTORED +250 31375 diff --git a/tests/queries/0_stateless/03214_backup_and_clear_old_temporary_directories.sh b/tests/queries/0_stateless/03214_backup_and_clear_old_temporary_directories.sh new file mode 100755 index 00000000000..e0c8f08e695 --- /dev/null +++ b/tests/queries/0_stateless/03214_backup_and_clear_old_temporary_directories.sh @@ -0,0 +1,22 @@ +#!/usr/bin/env bash +# Tags: no-fasttest + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +# In this test we restore from "/tests/queries/0_stateless/backups/mt_250_parts.zip" +backup_name="$($CURDIR/helpers/install_predefined_backup.sh mt_250_parts.zip)" + +${CLICKHOUSE_CLIENT} -nm --query " +DROP TABLE IF EXISTS manyparts; +CREATE TABLE manyparts (x Int64) ENGINE=MergeTree ORDER BY tuple() SETTINGS merge_tree_clear_old_temporary_directories_interval_seconds=1, temporary_directories_lifetime=1; +" + +# RESTORE must protect its temporary directories from removing. +${CLICKHOUSE_CLIENT} --query "RESTORE TABLE default.mt_250_parts AS manyparts FROM Disk('backups', '${backup_name}') SETTINGS allow_different_table_def=true" | grep -o "RESTORED" + +${CLICKHOUSE_CLIENT} -nm --query " +SELECT count(), sum(x) FROM manyparts; +DROP TABLE manyparts; +" diff --git a/tests/queries/0_stateless/backups/mt_250_parts.zip b/tests/queries/0_stateless/backups/mt_250_parts.zip new file mode 100644 index 0000000000000000000000000000000000000000..15310b79054e09c75876efd20845bbdabdfec2f5 GIT binary patch literal 265998 zcmZ^LbyU>d_qBycBi$g~(%s$NAT23KcS(1Hv~;I1q$7e1-7V7HNF(_UqxgN-`pxq% zi?u%M-gC}A`|PvtD9Jv3hVkghix-bx^WJF%st<_?m_2%wHSzor?4w7I7>$igtex#v zfqS3Ho1zSV(_TH{Cl!!FHF@ii#D+%V3LtZC0OSLHxL*cIVv#w>nQ$yQ9NY-Mexva= zsnm9PanzoGNa2X{s=+tY_fqig;^!Uf?MjvJWrObx+4Wh-Y2Sj^#lqdqkniWShC`Fv zt)Z2wuDh$4yI&2hH(Rg4|7q>}`&`gitX1627ys_E;X*!oVJD{b#`>!7BCnwp z5NcB2diVRi??vB&kD-F#E%2sm;r24*Y(wzQFHxWDrtP-M=x(pd_pZ<2u5aP?YQg8~ z`%dc(_%D~lD=|{P8w9`joc_H0cq=tm5Ofjw6)EO=o5_KTWB*C!Q_VN}HsCysUt9*{ydw3%9QbE_VLxz9JxlSYTvsU!JC2` z@GTqMuHN1FMi*U+3KeMYb9n#?$IN4@M!K6x9wYzqd~&&S5hDl?yxa1vYbCV2Y`wdD ze|ISeQa`>UtsIf~pxxGH{UOCPg}U%=IGH5MbNTY%t^oYc$UzsxB3q#WZwIY7$}x*8 z>xCR;f{*Ji|zH3p5wOmh>kr;4%G9~9dp16!nfsKEx!}JTUXy@%xiBN8V z@O%3F_lUc8gu%JRn7{d{`_=AeLkwC54`Gi>FR6bLJrB z?06B@h6;tTG{nPhNw6&-;xrJJoAH~7rHb0yWYb|svt1P#wW!g(EaMs=EcT`*QR4jb z33_?etnsa_>lnFR;mqr_gI6Ka@eBv(GYQ2t+^52C~kX_T-~!NUptBc=wE5O zX;fD5-bOq=9S6~H4uz?xZ8<5{OwR`(oY0{tR0^n(3~_i3W{|BI?sApK*UlvzNAaZ? ztkl6u_HTxdnhiVxM5h*}+0|IIRJ^&SKPMdXA-c}qTWu3RRYGCBrgF2Z$@*mox)@nE z)o~*KCek!zl*cp@v$PDsA~?*Nm^z8Kwb8%{>#xx#(UyWJKEp0Yi@6@fu-Lcge2rR)Zm}I=>~Zc+ek^5f_3%-; z7&|I5-1Gs1j}XwC*Dl;jeTO=-6q> zlAWXGomf0ILXOomjq=rGW+;HTE|agZm}t8#=dN%_+fL zpsgBb5);jPpT;p&wODG~0Esr^_+_BW@bg;E@I+1pFZ-eJ1+tl;4+!t5p5B^z6yN!@ z3%2r*<`g;Cx7{9}W^C}o1Y7&*t z4e>_ve`8;X-)|$tZi~avU{-N@L87F(lT6ty592|GvTU8@EapAt?Jz7MXb*9hAfGI_5eFaDaWW<8ScVASYR4 zAM`*C6Q*+rOGloHHO!wiFqb_snIdXRUWW!R>xmRyS=zi-P9Kop4fOVlmqro@PN)9? zF$4E%zlMw${!gkqzy=k3ipX1CTF$SRUpMdT9*i#@*ns^_7~N2+M`Q0Q3iYk~KZQ|a zyH4D8)O~^#wBU8OQ|2EJK)8b=(km}=L7y1kt_eY;6^R#lRB@V!$uuwyPUD}Rb{~A! zggYXY;d_K4= z#78&{A;@{$w=1i&HJM`D-}cV}-ZDM`oV#e=!z+hMMLe2or<8 zv+lW5OF-8w%A!|OT^<7N$5m;H-ZHGg8rq%ZfrwbY316kI+u0;_ zP!;a=pCimj?0c&6kNLd6hP^l#@Gv7Dk&&3lGhOV?(slAPQ8}T|w(AD5x-2~2+yBP7 zsfktIOHqg}8`jZLW*Hn>cNg=1@EokBrYfQNor5&um`60bY8&6AK}&7`Zj@taz(S>rtdpx!qa zigkrD8kyR&Rnl+@G?bADPC}+ipw~&*|7~PLyQOnS{ftc7IZDAgrDwK=Bl#A3+5bq) zV%aT#Jnl%!am3=gc+p2Sr&8_a2b|eLORk`jU{WUg9vJ7w2=Lg)-2q7wLgyDNu)M4% zje=$|pZPwkNR-YyoyDa+RHtoHhiMVKnS0?Q0^>)kYRgvJWGi3(vY3;7W)M6vEGOP1 zDSd_b9vXEa9ME7Ebs5@){ot1<#FBZ+M#~h%BJYse{#{yjJo@3nyalGi`%^Rny4G04K1|vYVd}_R>rG3oEoOA9}hlq21R)N907HGK)hk_OJTlP&6LW6}Q zYEzV4f0oL}2)e2H$2RvZd5#vYVFY#E{>#L-c=pF-sPzP4#7*<}WJyaO9F|v}{flZk zA34xn6rVarv*7}s=v{AKK_g4r>{aA!%x@>+d#Sgyq}L8@(oC`_9{5FXK;T>C=e5EG z`4^AJ3pr$8k2D}MaAIyoK~RQMd1ZXMprJ7`X*v~O%w#jsv&;U_IUgFDnN9EwScoFl z5M5~n-^gU*26`FF!vB9VD zCh2%JU;A5y-FKe{(l=-NqR4KKEAKqoVKYw(Z!qtS+~>Z@5E+;=XuW)=Vddi% z@^MmAcEt(Qa zVxRbxXMCUf<kLe*G2pqQ?>x^iB*=Q$S9zJIp9vxHHRRaJrpa-5iG*v(c@x<8Pbc z)vb1|n(#n6CU_sBfs=eXeclv;@M__{YNYO2xdIz?7r(?dE2Ad!(e64>nuc&4#MTOh zEVH&)p=+cd0h91FSH5lib~7TQFAk2~|Jybjbd(UzW;{$j0i=5Ks>uok*e;LC&WS$V zpQg2QYtAHk>Pv48%Ba#PCIY$rtY(9Y|_Gdmun&m!U^trfsW~K#~76>w8 z_E~3~8NlV;XnpKL~`ty07UnHaP(cXnJegA6# z8MW%)w?*qjyi~(j6ln9-OFd=->g%Gj*);Nn$`N!&X$n-;K*4iu-@~aDs)(=0y=6_Gptntabp|EDwmaa%1!Vjq>xJ8M1??{-F8wDarXa7?3 z!GMD1!*^n&Gv@wNORmpH@>cb}K(M(O%*_aaY~9&;iD@gPQIoL|VzS)@Y(Ztj-=WQ9 ztV7~-BZkH<=kGu-95XyA?IKE zYBn7&`gy&pZxtv@gyC}7$d+!^k|*}={nt+^3wybgJr`UXhHl&9+}u|1J8UwR$Z4*t zp2R!$9#OUPJ0Hdgj^uLED{E2qOPq*RLM2+ya(19{n!>9wkBsb&VX9i`QY|kR3%;>K zO)x>x$)Kk+F#DHtffgY(>e&dzqmzDyuL?ix2Rg8M4l8QF+gt=m+juD zABXglz6{1Jh6`w1BZGmu%QMG07AM3Y-pqzSHEt|XWA&ey;WX*5##)~oGr(|M(*kF) zpB?m*9>O-klhZyd86f!yOtZ3$48JW$=_JvV?Y7t6<2z_GO|56Yl471BD0jXl)lRxZ z-LW%e(C(oZO!(J6CX5kB7_?2w$<0o5$Php=ga{2Jz)p7*P!bB%L^1~_PfY4)N^!b}<|egw5#4tpWuH#DH(>$Wu8;?o(TVU#T8BMA_9 zj5kvH*FAAts10v6uU`6GI%R%l)9PVW&yS~74~j1Q{!L1a z(f0gWminh#^%rn;no^=5x7HE45*B;#oxG;dIoze$J^^jyLL-9XmH~@%1oFiq?^&he zuriktd5#(z>n;d~|B67NUIcF^^8HUrLFoSZGtuA*@=^thK-piwvYbd+$xb2>CpMg+gDk%)6=gCtGUz(~l@HuihYd8l8BwhgAC2|>X{ zqPNWiHPs>opP8wz&avKnYlIjI6=9q9o3n}-pmg)y4)Lr#&0exi;9IImj_hW z=)30$Cy`f54E{o+&nBe9t*}N%=-4t%T@yq12F}rsTgKKYhN$6FOe0@bbbliP8Y~re z(#SYAku6+ly}sSYF$ZeO!Ue9#s^{J@l`GYJ zEFQGwK|8r;X>%Xm4_DE1(S~|l$0eCXc%l+3t+;#_pRkLD+}RSCq1(ki>B^_1jS}_2 zvQ)4aHYYlE`|@A?%x7l0mdC|HEbC>_AWKQ^s;PZW^50}Oa7Wd(>7r|7`K;KNq4U~y z)+w|I`4qyOyrRb5hsdNehp0Ygi1*l^F0nexSABFb;|Qi`|5s;R52F@i#es>9%@uI^ z_=H_k<7p_o$Uo}bASxYRLK+ZJ?OqZfi@8&4$0Y|D0rRK0cG?IEd&WEySGa3B2_+I= z%O--EMr$MUDAc+XK0=i>tJTVoV2%7p9Y0{sD}d;nCf2s#g( zMSytfCn(~4zoW8fiZQ^Lj>f2uR&+W+w*Hy=|45*o&L`H5sayS69sJ^fTq~5+jzGw* zO-^y`2_MT)a=%-_EL|If_wHZuDjPpE{&@q1Hz`(RD+;`D2*?PRH21f^>(_Eh%T|97 zfZqs8eZT4_{He_P$j(fdGj6lD?RAzD1euk<_}?5R@FFR#qoWd#v?5m)zg{CuqS(2t;@)l-TnBTQ$&D=}-I zf1TxX>&XR~p^~9`ZVG2S->qgEt?)+82G$X}ps=s#B&a8YU~^?ao$$$vDpf zB~E8be?vop&VYf`(vw1D?qZJ(jkW0@`$3K7xra@_1mACxw!%1D^LLvSn!=Hlv9OL1 zoffRpTo-*vp<4T)7D1$5hEB68Fl?9TVQ&R}t_jmHlFA=@SEh z&eVW1gLxf@fepB=rq81XgPgY^AaH_Lq^K_Q^jn&mNFc2Mf-63tHuMM16o&-Vk!C*p zvD1sH$P$`w)r547<}#`i$5O`+wRy9+YxIq+{Nv`A5{*9i(Ts5O*Vsd8clS&KOlU5T z+p-El&a_SatC%x~BXk3ka;oE15e;t8C_o@ALRnsioB&a*L&Z3^WKb4UZ=L_zztgZO zl(T!Z>F`M-6FoVcUr8o+e+_}>wrji%&4G`l_ARfnk$EJqo4D8t#&MIXDzL7h@g|Ld zkZ0+J7iL6VI})m!@ri|LC}Gmx4_3@?TbEOf ziH0H!D?U}Hy?Gh! zC2d`Ht2q7#m=?084to_tOTZ7O0x|NtM+Bj)e>f8uV-&^nfRoIs)<*3aTn&6H`cKPH zKSpDx??c@+DK@@Ni%We=$-w{Af6X#ps3Pn{?Iank2Kd{@*k=tmT9`zQA>K^&Hq#;n z4Us;~y?ORt2&UR%AFRSkG!nOR@&TxyGwTspi-ZJn8}K%RH)t;FS1>kY=fILI%T~_Y zr`P9Z{9`wuhOuIQLdwLH9?Cbbl-a0KiVSA$?g)qXk-Y+Z8bUavib{Lvm`$)!yM9#J zrkRxh{;V9pD{oh2Itf5*E$Jhysd7J{`ilbsMefRi-_RnXO%W&aiBzDbw~iXu=}V<@ zu7=%4;_-t9kmbJ~7gI6DLWNl)<=9V4Sb<2u+y@!YJHXA^G8-TEB{K0L(htOC57`0F zVD8u|VBi(B(TwlqTGnzQH;OT-Cy+HZ2zbhvtNQz2Rc6c^Zr7AZZvm2cnpQ2onOh^} zk|aa88CX3(AY#o4b(T%OutSQv&}Ho(@$5qC0-!WQ6`YQf0&yG(Y1VCLIYh7dx(<1S z(YC~*9+IFT-D0T6arMnuf2`+hm|TGXoZmMP1e907%t_crjn!}~Lq@zZ|IY8(dceyf zq-F_mdX^>((@)Q7X(V?>r)67bP<%vwbOx-kD|l9GPv9M2xjmh6Dg`~0!8>=)GYbAM zg>KGdh9s2%KHzND;eGZ@_~*$C-d7KbMsGlBbWCM1ugZ87pNP0puAZk0579DYIZq9S zS1Xx{KDT|(nb?gn**qctAR8|^H9^ra-7a)YT7YY+8* z8bIJm$leuXV#aTV7Qs7Rr)>B90McV1<4murYQTt$d{VwbhITYMe?B(se=X_8Hn?4% zXK$yr|KjoWG%49DEa;F^AQ{*Jku%as&Hn`Yx_APw>G4CbF+9Z0*fZR$2b1< zaaZ$dLv;)IcaLv9YqOXQ7vVGyqJqZqhY<=zx^9z4;^e>-@Q*r!bwcEff_FLGm{l6n zigIcufCaGOq^9do^@mrX(zG!nBeS1`_I&n4lxCep?! zL(5;O&MNQyu%Ow0CoEQ`MGybViZrbP$@pnE-W1jer1}rR(<~-ByVkl>2f;aZ1eDx&O-;ux)E|(Z2xi5k zA$GIwf6O7_93c3(_W+rLTE-bA+M%y?Y&jFsJUQvKT9S%MAUSYko#FesQ{)3kC(0j@ z``edj4xq!fm`W9NVE$!T<{r)BoIKk1C?Ib6{kAEdGWP!tGZ-{33pYiafm_ zAoua&AN$X-b;g2t#p%3Cw;eUsO4hRL;tKaxjrW^Yw}^9S1ArSUT5@tCDr@4iVsaog zmE3&EWctItnMp1{IogJUvu4Gw46&<7OPa@_$Kk;>QwD?1&JwP}Ctk`T<+E2d36E4_ zXXEG(HTsDKn{tunxO0*#e4%)Saj3tzvO2H+URV`hXuftG9;X6~P;Y)%*1&Vq4}1_v z1`EJD!hJnDM8L-^Is?5h7hK$k&Pre18V{ibr8Xi>YMg&tUZpOetMe+SO@RMWgyD2~ zF!|t5^li^^a-}ssL<7*~!Lxsn9hnT4E7>sQL*RYWWB@L8j1fbj;!tKUB6_%=+$b)0 z>qYSjG-Td_EmJMkHlCRb$3+9k5wSCczd~64cNnhlBmp?duufFN`jN89cR=^v-4)3E z?+unRS9z!jJ&(1_&7M5(dIoeYsrU^FP=zKj;i9N)`~ZCHMHb3Z;h$e(${b#AY)-cM zD*+FQ+xRD~i{VDFQbm#CsM=tKv}-QNUj}}Tx}TTKWfvFqdDK*U02>(fG{YVok=H*# zAevUmx=&BBrxl)G_nau*-30pj-}uC5*?sb0J?05ORej4XkVJZ*49i-J_s=$?@Z)Vd z4;2|!Wy+Vyj7LT%J)`BbD)HwvS%u)x0JoYbY)JATG+hnG=TYmd0ru3%2;$T)PiOupGdS3Tei9aT??}F{Ye>*hE5CYmndlBFOz^<` zW;~j|E`pMKf1^lwxgv@Y|@(230bWH%7muZcufD^>!2u~F%vFS`<* z%roqBJn!E`UI+$+Y>VztG;R0g~VP9;f@ zkd*2Q_LEK|aU{ppkMD#{@)oeO7jiRLZCiM8s=vP_4gFV zXx{C=H)i0@w9nT}K}<`CS*#wc)0-c;&o>5fG~CNFwz>NNX&wQDx~)`|)aOW<5)<)N z0zS}~3>9Cf^6>lyb!y z>|FcoHd*(_?72k#KAfF1_Te)vN+Gs8v)sr4bVgOz$#{!o$-xyRPa-DHoVHiDZ1{Kq zdF8fdn#Epa1*#imTV;w{IpDlXb>N@+$%VYaYsAy4x>-}1L^dQ-^=32pP--mi=dno~ zo!(6F^~V$!$GIwttBZZnQ>hK_U+3rZA*X>?xhFuD)^ZjFkui&`hvpp)jEy6w5D8HBS<~NExj5AXRW~`>%`p)(zaFZTtNaH05hKZ0> z!~Oa^$*- z5rj=Rd}hwSbYcun#&|BBzNeE0T>v=ZA|F%10}fsQFdAGkaxvz@wHhf5JQ-;T z1b<7Ij=wObjZ5gQ5l%zPxBMTTm0@t=0;AQ0fP=J|%kxjU*IlYA{$p?M7{6Fu?7rt_ zJ?nse`TMA|e@2zD;M+eAP)wi2a2{9vQUW`gB@gz!a-&VM800(2U=aBHg*)93vocUs zZdh4Y9Q}A^b9t{5S_fEayMQ~RPjj>_P7Z7jL??HKzY`*Qm}ZVhC@|q+@sf=zC`zC1 zQ1?)*Ykh?58Sgdb(+?o(2@9AB*fsKaQ@7-laoQ1Q124iYXkd0^_>vy|F@-2MWu-%6 zn21;`|ARpjOw7CpKB_rps+jTWPo1s@JI7OWA4AqZkcr8PuCOb-Uya-KS@-e?A!V73 zq4d<~HB^pihvxYb^H<($OLbV{_n5Kg`)WrIBJF{|AAz_ zcv7d)_5lPra+$$hWq9~q{UK?Y*UD;w&PgZlH|FjJVMXB1gWF7-t$bz zWcC!SZIk*mVRV+Jwjsx{QrZM2}mZ z%!B{q*9SA!aESm7-~^n&g9T&?rM}TL)UmJjX+b!^*!ED9TVc}qkEI!$XI5?V0nZaX zs20xzzi=~!O~dtp=+!{U+s6)MP!VwJj?zx%oT)qUwu%%p4-kegS$ zZ;DB!#p(COpE+i!K}L*zh|$Prcc_<#Ce8upg`cpNyD`%Ge<%!>!mRWO>O4r%pDdDA z0RBvdAA6lJ4tf8+WvECyU~#}VHLR_jf*&wC@62mzvv9|~gWa1Idui;f zv&|k121&5iw9?|Z=Ka-;u+rr00O~MNNj*b|&#dhdZ$nXA#$P)nY9};WS0Ynf*zDAD zm=XGGi6J+;V%7kHQ)A-|J&%dh@Xc(-!H3%aDQ;LxC6xJi~K5o#fHQQ$OE9V63-g&35-`#w*BhIf6}u5+G+vgKxvj=g}>mt+Q`Z#aRmIsM5YZE zF>z5HDLOX1MRwkjOT*wxH>6hDL_)X!60N9GZ`=3Jz^BN>#4pj3mJstQ4g912@hNaT z3Zf}vbbgm{?yqU!O=siAA;CEW@8p@tuo;2}WS%}D&=|w9RI0bN(UXH}V>=bAJEKNk z-1cAf&Tsd?*i2Lj(tjxYhKRHpKxD1L#Kjn@J=a+j%~JlRsx9p>_z@F0k@uOO5bA(L zdd&KF^NuUixK>%IssBTd;eL~AtVxU5Wt)tVyrsf5B;fI>;_bsTgCtm7l}}{KPeHp# z%|&3@c~wCs9S9Lh!SmQ;fLpn+@Zz%%lRsmnOO<5aS5EHy$I3Sl_OWbY*l1A>JMS5h zMs<9iK}pHf=)+mogyGNE!oq>ZLH6%-UA;Wi!z1-ZL7DInrzvpKFM zGH5ZXZ1=vkv>ky5Wpw_ToyN?vLc}P#=*FpGA}gozZEWJ-&=!lTEmn@10`adVbn=Le zY{x`GwIqwpA8>!)DU3QWuSI2RJQEfypX;zK8|81JLV*9?`j-_cHrBynurU4N@6k-7Ircm;=UV=zvMZ$Ax3E4e!cxMzEd* zua^6qHBNL^`C;<2)!b{NXv#|}3FA(2z<=|mS@Ov7fyk7@%4#t5n9+U9lR@|$7CG+# zK+gH}UmuwcDO?ewSb0}rt~vSgG!>st;TiDXL=JpuKQ3sImiIRI9IV78C{Q?O z%N5cZ8Hse_FqXuS)!B`PY_Pmq^-(J}Itwm$#VOPy`Zh0=l$i76`yjqQWe>221=Qat z6@zU{BU)CWt4Oeq|K^dI<&?e`)D5{J4pWf?@(hh_48X;yuU52C(H>*zezkb7`y99^ z_sMYc2(EU!koxoG$-mt8XL7)U(6W1Rc`;O$y#~Q5O@KOM)7fr2#J9V$BK`u}GIlUN zl(f4^F1>(`Zb_NfsxCOS;2S8D;jbTSjy7z?w52i7DSz^fb=nBgy5E}LYBc0CVB?>F zsDWx_Vq@@jf_Ul>^l^9_oJ%w?#S3 zbJhWe0}+b~gll1cJOWerXXFE~{XCQ;HlBBNmBo7wq?p?e5y-nmwX~KEL0w{|Dn4h8 zDQ;)biQh+)UnDsagX=pa(|kyPxuSq3QT%e20#z@?lsw@JDl0&q`NNaU6G!Fa6?b0@ z+Pxp!hD^_x*!?@dS3=*%DORMNH!`ML%dc^50jR&|BfhPG8>@b(|B5t40p>^6#)XT| z7)Rhgui-7^H`qfe#s@57mIrESv6g(s8L;O&{5F|hkEZeEKc4=132atrwr$&ifnFvW{up!4|!b|w{hqnSk@wjyK z2GNLp*RI*=f3E!V_9t|>18UT=&{?rclpn>+W#RYy8!ffB$rL{cmAID71hN_@Ib}4O z7R;$-LtKAbs;>OB8a>Zgdoh}2BMdOmge;h&{)1#Fx>W;4nHh(Y^LRK{R=W2`OL;<7 zW;q_pfB#t6NqCp*5;ek>LRuy>k4(&Hivw~Ru)KWh6vmN*`YLXPUN0$RtcE*YZqwmE z-b}1}E2mgWF_-c*Ye&`RYpPqma9{kNxp1yIYz9@9mVf5k(ukWV{^)nfE{>mhQxANm zl}a_$YF4|u#-vH;dO{r-D#!ZS$>uKQKSa6X>cvn}3(|4z(4^o_u}-7%|9O~p!>pEO zj}pC{XrfGow%f%ie>Bp7{nEd>1ntkRi!07}0x88jURui$@FR~Wdh8Oo9omc#r)Tlq zJ^$I`CrISAeSPptv3enw% zIjV()nPLbA&*u6=*5zIIo8O#)Ia$Fonxf@64dLZ25(*t5@fH`S38*GxGi<=JS`2nq zk9wX%Vr8z0DQPDM(*vR5MOfb+C9$M+j*ev8sUfG3m7f)ndVreK>@$^Mc&i|PGEIU& zy&O^2_mlqtHB@{ry4A!mjTzGmXO-71)P>exnEoqGZ8UO`0_|BZ0@>y=8H_EH<4HDr z(?8-L7-&vOFiJZ#;LPRii@FF~q!}2F#&X5KKz{|;8GtNS@hcQJ!;qHaeBP0b%6oPH z!I077!^HDmj)B!|rvfFNjhi-CiHHUl!Zf7rnTA(+cuGmWzTL9llNm2l8l+{1tSJE$ zXdad#mzW;p5sc2$67KyhW|@$PGqEmPO8p0IX{~L`bUY8cJfQ_~>)3ggQT$fdN&wQG z!K?Bcz462nPbc5?fZ>kml|d6Baq*Fe8>l+=lCbqK)vB%4fVCRpfq;q~M{P;)12;d) zj?|`7z%ttLu7^ut+gNd=JD~aFVff9iAP9E!H_!l*dn~l_O*fy`C=p7Z7(^1Mv};@sg_W2K>k!WVq1m~q&P72! zw?}2gmHl*Hf8|J!rdkUfxMc34=z87{NEq~by3#DW%Ox$)1o=SNOgA$p z(m;B3{-}1bF{ZTez=?;DfSC;Vy(BvAY{*E3;i!&C`1i-tT@c+D{ER^xC7zXpPSe$* zE4YHWO*)sI05|8&DfE!EjX(DPyp@Qixj9Y^LgkmKNyW>1u#{C26X;1|`{QC`qC92d z=zK{#&H)Dr`#9L~bLq98CBr|d_-K7T7*IT{T%9d)93g80U5g{lW@EFX=l5u{khg%Z z3+#JCHN+5q3UZp=gozbde#MB5wJm!YJzjm zb=miuFV#-w8#H0k%a|V(BvOpZebn*BRx}^O4>L`{4nf6XYgYsvSz(G7xjaZ8XRH#W ze%yUXYUlL^KYcfE0h(t;&2o}yQ}?FyOD52ef7c0Gu6?Q-6ME*j_!w*zxHGyezPtT! z=YMni3b-V|lHr;`btDr5d{U=o*d1GWI3%q4KL6fejj|4CS@VAM;n+!?FiOH&)WoSn z=VkL6=Oy?R$H=<|*Y1rra#K{R*P^0B-$Py|(el*seaTu0wcy>!G{o0_&+I+sfpGhD zQa9h#Ho-NlcyJ}~jc-km_U#9bBVzWN$zKxT({mmO$1}W&w(ZLR=gDA7oXb#stu8!# zORKf0TkzAgxAj+7#Kk?-X$;CI3hY(CCBSlK`ug=Q;OOOu%Or0b$LD$D%feHvYuNGz zsrOn#z9LH2)1{X&y?pCKv;v|xMaVy9@M(-)cNqGu>G+j-*Jc|hALHcf!ilu8Atf^H zLO#Oxb5GTfK17)Q)gT$6m!CP6td`e1*lm>eaD1dQgC11V5|g@e3w87UcYdc>TbWg! z0%~Mk?qx3i;r#vbZ8IZ&uMqt>RXX-3byKEnyOQ)VYm06r6e)EVp865n& z9Uc*^KFmsFom#~gW`x0*i(&vq-w$y=b2Oa zd*yv9{*RxDZ;+DW${s0uW#PD-&mJ7Tk$fYuKP@u=VqZfbo{QU`7mI(M^ z+d6c@!|h?5J4kcog(R#tK}~~m={TFmC;%leggZf@`WAH~D%OGJ6ISPH8F+{QdT=7o zRWvYF%~|oTi(u|yl%|93Vrwl&nPOmtMl|medFfq)=DX5!JB`=rPuUfFoX;H1hWY&C z1ZLnJBNDW#5WyUNQPwgVfJ{LBw$ncqj?|})B>*t=%YpRnXj=LB!%3cdbp-ugLVcfMJ z&%0T3_sU)UZA^in6Zsv63ly#Qaq~YC6#8peT;jj^YBXtwjMG*N62+K@a1t)<+5&^z zZT4gqjeF2VrIkfA`zew0MRD@aPat1m`?;5(l}jomm(%fbBQv4rosc|JB*VI7U%+5! zeeFD4ecUP57-vPpqM#vA+`}AXwh2--AdxlvUOZ@z$o-_A7Gv~pwa88$+cJ=0D&oF* z#!^2BQ0c(|uB%c*!qQheS5K5I#exl|uKY@g(lV6&lA3rtYr;cP?5Fm@v!C4_5nJh# zy1$64=ShJ^z*z^Us*CadFmc(SZB^D_6~5QUwLg_({v@&~5JO(KPaq_IBXX3b|CC<( zsaS!+_5E4+)Ar8r6)?uN>{ii)1Mtny7%o@e1`7>Y0m-0TShYUp4a)Mnq7`dhPS%rW0cyJG|TCLm3$e!2CQPWU&jM`1B)p zU`3O*n*CS)#15a$<&OXmRER;zUi;l#A%XZj*N>)t(Kq>tJ)%3AI3zAT9=QD1?z2ef z95Ro~^LSyK0^c1e+#e-t?-oy?Mn^*3^k7!OpTgc^R;ByLA(8VT)Gt&;ps1hP2FFFH zq;AMs6XD4glF!cnO|V*h9B!oWTQA`reR*uC__73!67JC8{l*V4-{$BC!opE>NII*1KnK9ozvl3eLAfuE3t zEq_&Ux`%oNSH`gN!X~))ok!e7C1^h>oHg!ND{&4M&eI_u(oT8&i2?g^ugQ9;VJ<6Z z!PS-iTHv$6lB;9G6V~jZW?TJ%S5HB&db-{rX+Oc(8AAWQpLO+Sz6AAo`m+)snn~=3 zczW#cA8cwK7ZS$}69PdPW-Z?DJ*0tf&#Eay^(THOM&*>6`Dw56Z!NOs&hedNzat(a zl>6i~_qern4|x>-t2JMs%=?On@K~jCqhj;OIHssGnegdZH`w(FZ+QK|l2o0?5syuu zRqYf9r?5zQ<+YyI5cRuU(dpyAaIZuO&k~^KC=H^CQ(SA3z8P_jHHVVPwN$_^%&(Xj2a)von z)~qB^qSbh|I2R4)P3lZ+_A~_&7;N(v?-(US03xRW0bbd&kMn%nb*sMz>I7SO4 zOXe)+bjW>~Fj%{}A-;tXHpkj=+gPr*1_otF$>5vVG|qX^PsHJhKB1HOkY6~$s_Jup zZTe|@E)_*NCpZ$76$wInBfpD3)M(u8)WSwUt4r+~OB*Ri9vu0!t14ZilSnxl(lTHi z>7OQsCOe^*KxMYH+J1~k{kUZ7%Zs9jB}lJ40{6-dMNAy>$A&u`AFIgYZJ!_t1R!N; zA3z}&4Ypc1Hn#|M&W0~>d;5XX629~r9{)HD_NIl3qJLuw9)Ddns z<7D2CeI3POM~Z7lm-1&hN|EDbRu+{6h2|&@(17eX*iou0MGKVeUXM_6%A)o`my&bw zA6TZRi@cyM-Dp|f$0wb*;bdbD9@+X!U|J^ea|WYc8O|XHL^Vxuo%5GcnY_MR)#mmV z`@ii4e-gbkm7RjzJU>yIk5eppR7?01VZY~J981vJre5z+D8P3w>f_rtE2dDyLNoS+ zB=eBV)%bu>(r0p5I+j`1NwpT8aS`^Q|2T_dl~SAvt$B4?@h`nMuSU9#aG2u+cyU)> zm_vxt)hXYGk@7M}hz2G-aC~VF~pG>Hojl>7u!&tx>ANa@T zASCAFE&h2S9m=;6vgnbwPGta=c-_~=C_~Y2F7}#yjvMB-2NqnXY;OfVI~=<@&Y2-* zUK^zZSW*!;HW`&2SAp!UKW~H?xenbhR{bc~2=x+jwte!}h3N%$Oz*QeJpO|n8-fkr zp7SAMSpb!?Q&WfSECo*@CE>8liO#`@NeAwTZPpb+easeep{h03VB=F$ zoB|UOP&XD$lFaOjJb~l6mxJq_yi8TYQvcJJ`^y3Y%nfFh+HN;mJX;Mjd?9jV#^o3D9RB8KzoC@9>tNbs z^zakr%du*#QM50=j-B*m-v8RIE+osi=Si=%QH)n3@lr#LHTt&|3BoHV(cdOR_&(@| z?UYAWn$(lzrso<*o9uzYfKF!3o?vqnEa>G@yXJ1GpVXmZ(;pNl&35r@05xj$FS=0d zupgqOeeTTVuQ?rVT9bik4ocK<|BEhXNC=`1Yc#m@8g1~V$UP3_!7(k+lFQYK|4AtP zW4jAg`(Pt>|GVNDy7-vIzdZb7c@d2VlK=cH+pJFC+q$ONc3PQ22;E!zVOwQJ6SQ$k zvVN9Me3Y1j%ETmUVfCN9g5_u-#(K-<1y z^!K+t&?2I_1B%zlPg4~d?|kzjUj6msws-$Z@m5P{vOOGNs#-ihk&~b1P|N1@XJ~P& z|G99Q?Xe*vSJ^adg6->(m&kf`vggd_8OsZWf@k7mc{Q4eBs@EIusmVg^%Wgcp5vkZ z*CW~ahgb*F_z2CT$ev4;0Gi%S`ro}cg&f8C{zKhwLuhbfbkZb{Fmz8yc^8?I&96?Q zp)l9)9!c^2y0+DtRe~uLuCdc9A;OM@U;hG&DUge6Vo?0sMgaRUr8m+}YxrH->Ad9~Ab1s~MwLqIk*pUNYu772`eD(iGyAx=ty1oJ6 zt_&9`V?>6?lzAqUhz5ns5t1o$GLsCYOhrU8B}3*YnL<$%DKexGC4`7FPw^eM=Un%5 zd);$S-@0e5_gm|E_V(=aJOBOP`|Pu~`=wrBN!X3&_N9W-@w3A{yOcu%>;@v$F4OgR zy`7m$zuX+`KK048(D%59h;OSK>GDd6+>wdI+CZ)|cKP3yhrWI6GndUE5X?TpTaX_7 z_PWOzlci8=dG=c$)pV2#GWkQj`Dl(g>ZJX7?yV|*?{^^*WQv|28{FruE=ZB1npc-w zdAMTgZ^`}(?>H@bCAN}crB`RkzrVjm#7}ncB{($ZdDxysb~H?q#IKB3AzmZ>>&W>% z6TeW;Jup0KW=s1-TN>$|c9w6iCa~Q~`kYtdSepLJzhz0)yAiE z#W5)FH)M7F+Ze23D$ttrZs4wKqs@M1?}=WTKMk1*9EkZZb(wkK7yF$iB;RsObUrSK z{e@rqC!_HJR zR(LH85#QbN?7yc)Zn?NXT@Ywg29(Ho}vYQYky`HVbuskv98(nQI9Ti-ghfPAU}x zfjb}AH1~DHk;=QNX!40Qp$#8km&7T zIPPS_ZgkyoM|SzV#M^`5_~?L5J6(42!ON%u%1sS|gmnxWiU8%rALSjR6mRwfB_(l2i&b9ffuap!WWZ|l0 zWYJ3ZhW*UgPD@4i^RG?uT$+3Ky;w`jzUJC?o9wjqu8TUk+tXBO2JTE<^10Nujz7<(uPu7>r_&AkT;AwrR!fRak>e!TRc5!mW)^`{&i<&1v2R$G zE$4H2%YvTWNj{?yQa^g`=spqg_pHYzPUc&Ax?gVltmwWrDwnoh?#RG-E#C-uaX?Cp^t9kA>Ab_=uN>E@e8YJ+N%U>;6@{&(VRhD{!{F?W0?p4uW=!8M ztB}W^J743qVq`~1{bwMt7K^Xz_!no@k$lp6psCfEO}f*p-MZtiS%t!go&@E!cJgY0 zV`s{>aw4x3&w}%c96BqJ#Td3VUI;8jg zt8#-ju zPd3%1PTDK3&R(oo=pOI+Hc{au<;Uh#u)y!pe0#S1-0A$Hmj?0NwEQf5B*~6ljT(?TTW>%-D5?2+jO040<@-j>6SnP6TL8w~Y(L>2d{W%-U%rID9A zZ}0ecFM9iD{XY}!xARETy`&t=cNQYkZ^g*Fc{;ysej2|*X~6(Y=z8Sy#MRs#R4puL z&1%+Xr0IXRb?Ud3-?Zw~-$q2X3XBcWggNbcGVwe$irII_>Jp8>DLn@dlQdqw>*^1s z_bNZybE0$g1QX+4R2MAVr$7ymk;n{-%9IbUkR zks@DC?=}gG%0b!l;2g!n4m)nDFwF!t%Y^Ozx5$gVj0^Uk=AV|1G+s+td^Nm(?iFS% z7Ru1oE|}#wQgF{_fpl^B1#nmX_5y@Lo}bvCpA*S^KaRuIA?jX240=SwK(R!3_7-7G zzE^1Q`$w5nH6~v=b)EdSbk=!$m2~Hxa-C=F2q)Y^XrLQzkge*#c=_Z1iYqf-&i1p3 z)h}`>?}nELWCb*6Hf}HaN&aW{pmgLQ zhTC1#4M%?>i1M2$1kO(0Ssn}g>m+y~>p-Zcs$!#wORz?L`{Nt$_y=RJfl2+J z*9=#hmaVcunrW+q0=r8WkFCwLi*^5yPOK93ks4Pa<%oLRx_igh_MR)VV;R5yI@{Wu zr@_kjbzioHC<^(5q}w}SW8mm5)U4Fb48@!rlcSj@z~|c`6FOl$+r?s9zsO& zS;ahHDwcYwbH=mgcH@89N@;VvV{-Tf%KXEmBgRqjdd2Dwxd*Kioc|V9hL%MK+N;J2 z`^cIf+XP zLCGvJB6{IFPR9@dW&T`@rgPW?ZmDx4Pfv$^JFOUI^g&GMNJsg=yh<$U0 z{^D1QojYC9THe;EW`pmEKDej5>3FcjY<;Mmwd44qs`zcijcF?Ne;c8gDX14e%m}RV z**ktxt)7hk%uZwR=iopn#^grTEeUzbOg;I-ilX$ixh?_3bKp40i8^Q3363-R!Nu+M zJB|1hzoflZn9us_Z66w!pR6&oH!<(py-hKgMzF+SudEoDa=V)gXtq&{sC+r7WS|yN zcUUiLYE}sx29?anI18G^@#F@|zDmCw!xVJ(#N_{&T>3W7*I^zubJ9 zs$;WB*WZ$&j-vu}<~y=7+_w;lIbK)GjZ87h0K4B=KHkIkZxShQVU|)=x_clo_dN}; zu62PKm|g{56BV1x_2sI!8Y#{0HqLSu5B6epkM1ur{M#h1bRN&vgQtDDs2;cT(zplg zk4PD>((?JQXZ#^m6?!L3xo_TO?)nEG0a414FU>55U{EQT*{8N6`V-f@a?p00*Rm-G zI*Iva$7cTa`eq`pZm6iXLdd(^Z3_hLKGtf8s!yz=$ezh|pgXZA|&h@bPYYoNYkac;-z9HD{F6Bn}})Jrd}(O=a>_jq<4 zy{W0ps9UrkBce9bw^A<-^eU)X+)$km#PLb_t^R?EW+9q~e~vAmKk~P`-{jz(FG_L6 z(03b6aL~KJ^FqB-Q)6$z`|qBupQoc0URT9KpuE>8|190(s4J+)<@dN;!B*SqD}ovC z?!9Gx6go6?pF3xyPcQK^@a?w4P{n5thq3|U=1?Yyeu=gs}QbhXfLDZ+1cD5BqVbaxkd$Gz#lXJ&72DlWeE6Zx+U@UR@e=)o_nz`x4@T>OyB@Dw_0JoXPX2!NyVRA@=HMda z>gkI9f7q>@E;yLEvb$SeaItZ6+{Z3Ko~+xL!eSyz_IvxYp&kSNR zb0;@P*R>K5_KCVxBk|D}v$b^$rs;@R2`J8AuyHWE;3Z^Z;YoEfqK+|}?#J1%u)>Z& z*P3eE(_3W|Bds0H>23{OlA@vk8cer3DCOlzp8ARm&RQt#;Q$9yA3;1yLA9Ei7hfCGM|Z{a1Nl5J&XuX-WN^G#M2-*mlBTF}6g zvm=2wF7g#}G289o88@dTc4~81^v;G9!o5mj8J(sAMDe&7Ed3M(LRDy2!asFXo87Mh+z9PmE z&aEd~QsUzYIy)Q5!JaGN8dlG>U%-SClmp?(`R`AKY)4jdT6aEFGd}X(do^7uy(_Tn zB;K%8p%Pafdab7}aU-?wFS`!oh#Ro>@=3mrK$_MrvphHKy_z-Pd^yvD zf)Tcz*6UKcKs&gfI`9@@=;=4}Ct%%F4}_je7U;QvYoi|`@}1||05@xy4}_izUCJLp z^KKJ^$4fb8wFRRVm>sFO@WmuA`PT@YyoYu516djvAt zIcBs{!I4lpfMY**XAT2wMCMmdUj-3CcmUC@hrvj$o&Z6*Y1f522uVwr0>6!j-0m0i zLJvY<87M+aK0lwfL=)+YpQIML+~d*qBH_RExh4!N2$h~#hy(@UZUe9y$bOzod->$D z={HRz#YflPOf!Pzhx-e~*(0t^7W&SF?`(&yYVh(y%N_dYSn3%YXTRSO>NB5Q5y;@y zZF6y*x`_!#x63M}pB;ql=YA*u4bTtnAOyA@hMp;)3lsjwAS5OtYr1xA^z&k$MjG@W zv|b8Ikle!$S_*0}2fMV?>K=8m4DAF3^>J(=+8BhYKQYIGgm4F;^_n0f%Gc}csz833 zwwNsTRhB}0g@uSgD*Ri{8*5V1PhY&kMdRY|LZoI@AX{!&He7D+F+%9e^%Q~pyj7G# zZ%DS7aVM|n?sH7Gun~2HRHJ(`+z;S->tQge0D=(1KL#OD5mVr|5z(7qwnrc$e%zVR ze`TNu@k>-*^f)*xRxEs=?DPj>dDHiMYOAZO5w(=3w*uqM|KE>oxu%u|62jdG;NJ=v z(3K<-65#6}J8#{q*TyIEU%&!XW-_05K_@5P+Y2~am&f`TUV!#rz84WlqIdR@gL7t| zH0copGUqavYtJDPEoXbHCHy3l8AVTlsI)bN@^dF?2m(HIVg)_yoF8 z*1e5?64DP`qRk*4R=cDQ6gCvvVz&_%bua2H;BCVols`QALE{JC<57T zr+nn?M?O8dDCq_BHXsLqI|ZcuhV3|XJz(2m=y3t=hxMjsx->9qqgF%*w9g=VN=CvuDnHA^3XL8#|G9Y#2#V&=E^%|2hO_eN*!A5j!*Wr2)5rpuKZaoY}xz_&v-v7!-Le^9i zxHg6%hIDfWFbqLpgf8U|qs2q1jU{2y&D^fVZ?ehLK7xTL?lsn2klMpTd+<<%t_ZRs z((eg74^ose7)K>|35?%9087zyd%&GW5_i_uHMPCL2g4KbQq=zLG?B26uQo|pejWar?23t|(s(~6FiqNfxp(p{kC))p-2}uBmungU8cU1=s z@#79?|CNCf)aczjQ}(#7apIW9=apIYrBU>3;wvmcr^(BXfPx@AN&i&?*-usI&(A%E zKILpS^;@pJrILo_NB%BTS+Yxvjr4VdY-)aD6kdLIR1kz)^j3Wp8y+Wp{?b$$f!uYz z-&AvCCZCO@#eiglq6xO2Hw8tVpdZ}BG2kAfZZn3Sesh0<{r?z)#DKY`wQFMx;s|5R zg5H&_m+~hWYLsv`-kY-g(yO?}eIeU)KYC7x9=j{6`9iA$62je;t=9w@(cJr?n|U=a zmW0fC2qm^;alu0L?eLE?yWf4)GQL>z`;NG1H(rQzpE3Ei5`9!6(2T8BPd#LgKpr?- zJAXt+hFB!{sA<&l!%EnQ(ha?YKt#A7!1dO{U{vv(0HL3<+3O74`7|)s1pGE4QWj(J z0TJO2LSPvvLi|flE%mdE$LN3aOcaa=Cy%#+!?B0h!?D(HT-6{U+>HR%0~rua&Lso; zgIhe)Or;6N&r05f1&G+X;@3ejhYQJKWc4vpoV|Df+J9&)`nP{>*W(JGF;mwuTe+>h(f@%5m3Q0Q3wbK zcP6w}!kD(`L@g?MqNF~FRqC~$3L zgw%h%9Xg}668@BeVno~N*RR(ms&LrtJq24+ksN~r_<~~}c4tOUdes~{qxG5~Ga5ON zy?0NkCp*h^iq}Kc-}7NH3XmFW8xoXA{(j;6FQ>QS_INS+)f1BNfJ^prH>KhjPZXCT z0_l)8X2E12_UzZkQttt3@;TUy8hYhrpfg%;Jq$)hK#n^4kBr15M1eRWipA(iJAW8- zM(d@Z7>z#rKy&wko5t@G(X;*UCtneRZ(92}v8Sj>WmsyUGg_|+GNa2%H|SsamOVWw zPg%)(FjfQ>Bb(S2hU0y=9}zbYv)!>)rooF5BmLs}ZyW}<<6cY0=Sg|AA&@K?5~tcm z?U%j{#hsbH=&}zsqu-Gslh7Hhw;l$gC18r0ZPS4n?r0pfjE+9Y|CIB+kap+A#S((J zYAaZb1k!)nw=>h-Ts`f6*YFfI1zwEA3Qmm*)z(svNt|bM89maAKn}({f8|G6q3w4z zI*!)N2>xm_WKfj|K8(QD!(>!LfH<&eG{T))OGuakzl~j5E;mmi^zl7d#-9!?-;Bnq zde6$t58Vep&h8HQ4vz0rUSN&yk5(!Yf`V{&>|iyJ{nURdxhc+1mi{)hEZX%7e=jUQ zO}xriX99;#Og&k=xBp2&6JCD4b}#J9<1YMMGjxSw>#e$@2;^CD>nq<)p3(%|bQICn zYBqu$h8Ax5p~DdF%o=Pv3_WeYnh%yuGi%&>WW-E?-$p$%N1S}1>jBF^>5(%@?Mbv{ zu==of>7ruI<6;vqEoqJ|=7>$|{<^o+3dWa^ra zKCoc7-0vCRC^kBcz)MdYyOW$=?c-v<`@rUWkIfem$i+jSTyvK6h1Gr@xcWqiQWv(K z5v_xd!RUiq57>4XdMM8N`?GJV2X_}HDh{mT0#C)c@4Z0q%8pwJ=>FDxPgI(`v72GruKheq?-6$8f9vdraV(5Cn zw!_e)cE;bI=pXfn0eLz0AS~T)tN?oO0n0$?2{Rq{UTNuNTb(jBib;-7t?K}Tj~I6F zk+>$R4xJBJ4`edOQ!s0`{J)o~O?+A(DPKhfDG7lQ?;&V~) zyma$(KBbs}nuDg#J1Y=KK~qgHv)D^ce)h-NkLSF+2b+&|V9r(Oe89HD;1dlb&^tHf z!;7=u5fhaHUXWpjpM8iWT`>GWn3_hHf+D27(pBK_BcC?9`RuB1Pwj&#FaVvujFm(i z`WEwhj}Ll4JY^%H(fEFO}hPd@y-Y8I9u^AYKwhU;ab-a30f-e+$o!pqRe zsZbBTzVhG3CGCu{ifMz;Aqa}lrJxigOtmaErrix{%c=5M?sRSLEj=0>5D>5> zn7K(*apMb)JM>Lgh5Yun%8^mrk>G{IuDS;GhZTe6v{*cLn(p<54w zksL5HJ+vt!+=;X(u#O9R*_qqPRcSC3@#73=U>PVvK9(Kr>9h}T=~l~jrPSnI)QhIt zvSkY?l{8WeOHj*B?!%xU+>a7i4P-w#ip^INH3h~mrqVePo7HZE<;N&TNK$lW?j@m> z>7kWh&mZCCM|qHRKP%(IP70w6qu&DdzzZ^=A)RD`SXOPP<84g_!$&+}cV!dr2kAgR zxbtYR?J)GvTKE$j{MW8bT-sC&xHbl%c@0Hz&<}(lgf8WeARb=r?o?S{w?{iiiAv`W zl!IN_&seOk>=c*gMNklgt_)ofWJ4@2KW>L=aA|(^=@>FhrhN;0%&xaaM6Jq%r@zluXohVF*ep5@G=%&4 zTf5PJZZ?J@pGE?N)4#sf5RnABGD$2&Dn~2J@KA&<<&PrQQ>jWz`e!crHv7Hm%?`*0 z6`jXcq!GR#frlb=MUWK*2AWEpk+~#VZ%8h7nu@F%mLfAN;)I!35j`%3Z^*NcRNuf$ z(Uw{HdP7-14rZ~vUyM%s-a;TFUQrEBm?#aYuO@ZIWNF=pqiwu|!;qJ-6GEjo}trIgnKVO>s@s8w&2APjl8Tgw1@;>ZY z#c7cqc90P6M+vM4GN8)zjPE{q;srHr68qkSY}bYbh)z(ODk&y;tY6iiY)xifmzO=1Kd3FwO11EKGKfP|U*yM3dGu0|LU`m4R)C!AINL zpTKR?t_(LHNm*0iw~>!B%j$FJe84hLe8Nt>xKuUT*;MI9sDJsnzOYFzIH>Qy9@O79 z7&V5@2doD&ABkA8v-Q*afzf+J$pRku*2#|?(@atu=%jte!dHx57>4Xe2Rb%IP5m%!;3oy zmJu}-1+I;N3Ze?!p!Z>GCHyG`MabodW=_-R5`EnizH<$OMBB^3mG>X9SKePLrey#H z;m(BCD}rn&V5uZ?ywAQnoW!=AfXyQiRtS0)`DVJ31!k& zEQ&|cAnXGLqJVitMor|BuJa9dSCJd@KkEu$8@m0CIs-I>JAALV9)_ZB;GP(5+J`|< z1gsGf{mVKb9p`UBcql@bf>P8!r6Bmdsw{upH9z0da4?ev%!J+nz3zt35IY?h`|(hO zt_ZRsEn}qpO{Q~CA7*_Z`t2+hiR-_6R0dp#}0P=fURiCRm>R=Md;SUP&5jxeKpxM7UAy7fUk>y z-^NIk&q8_>dRGRPffA&aJSFVbA~fQ6?T}EiCujV7bXV4j-Id)Fl`sMc;eM3BdLRRu z+#fx9#lJo4f_!W*34a9vEI{9hrKqNTyyt?+8?#fJ?p?$S&`RNC?Y5Hhy%aQeLkun7 zwjz+n;;FeVDt{{~iwQe*j*%<_HXsU@STrEqnGo1^7<{6EuFPgrJ`jSCDDV*(aBTz> zkbM6I2nhF}eyxN*rJx8U%-P=UZSF3r$Y;&IcD&+gcqTZr_qk}`x&(G(*B@AmV@ zAdq+N^E&Oz%j>u)e(88vq*M!RM*gjnsn8j%w;l$gB_Jg--83b_-I+;2ZIQtg8KQ>FQO(UU{^c6(Wu9sc_&n#2Knz3cm}wZ}n1xH~hj9>{=J zg9mCp-13R1I8ga4n35abhxG~d4Ix$^&zWZxPO2QAQk}sIkfp)zTXttIvJwT(*ku1& z0rvGsTKKqiF;|G1;%jA5QtjF1*N3JbW zJ!wn#(Rrql(h*qU$F%a=TmT8eGn{}o;Vb7`GQ9L~=lZVF(^qek%+g(&s&DZ?AWM5+ zE$f_idi~K~KjyHkoH1-Y=V;FjfZ+#1D)irN#?TXO=}&O#AN2rh?f6U)dp@p;C;s6BYwu8V9Ta_AY{_Az%QcKp6I+!Y5_^xZY>q=E_`)P2wv>6+A3Z(A)>I_H-LMDx_2{Ix%;gG=sy5}{& zCokW;_lVj87NSpaTRRtn?Bv}g;yAAj9h1Tfk!jek(>(VbKOLv8t`TEOEFIH}H;rY14P~5EzL|0c)nQmwKEex>5$l zAqb4nrTpo~Ix;@+j!XQWTk(JEB9*^7p9>Di_F;d^s-BY9f(IjXO^_Mw2;~CeRfmaRqu7Nd+ELKLy$>JPi_&6oxIkDb7a(O`Mn_uJZ>nG*qCy}aRzs|X~ApXQIe zxfI{Xf{zWuvwX*3GYZHIQwF;-+-bnto&IyTF&K>k%NDn78jWzLLcmVvz;9zLDhb*m z4?PtE%RmvLaMGn~<$t4XkofdV$}uxrMR2JHqp0G>A^o&FzcMHYcN>7!K=xxW7nR_C zZik*{{h@aOCuG@S`B9fY8yb69>ISF49$z;47Dl}M?60mS_2wxvxx_F-`MmHZAp-f+ z?%`hGjcrcS>k9vTY|G;mfo|DIvObmVwgaV$$zo^(}R#GdwqbQD-z_7X0q;@?PwIO!81zD0DtxJ&^erYurySF!vp@ zcb7GHQGJjHi_g*l=TY_)Wtz%9_GlL34EzT2OQXcsXjWTU74M%7w)jG&1q|d>ZY~k| zK02o5!ttakt;`WNpRECE=&ufO^8woqgO4_lN$YOP2X`MPCIT#)2Cj{Oj6JR-fq-!L zVe6%!2$_eQoT`{ zBHU33ECWS||A`hazxd#Z#Tw%UJB_&2CG?gaE7-3#%$EqCfrM~30$2}ZKpzRNZ6WO4 zu_91l!1?3J(+OCB#ys*SD@t6KEfBn%nFB$m@d6a2GnFI0BUJx*#AQkyezhM6 z*+nJ09a7#eZJyFa9)b<1ojj@%1cW;Zfo+GuX9`&4;rFjmNCJ2@23#8f-Lo%v00M## zh0vv-2#tO{cITF@dw}fW@qzXO`j*=z!4K#?u*dX+VofhWLJ*=5x+chojA#qRNX27r zYu)b==qpVQfQ9Hfp$f;cxW%?4PkOO7EsZw35KWw*{!#20$1cbbX%~Bbgb0D$#%KLJ zX^5Ww8*NBiPrpbHY(&XFJZAfotc=psTgo=WK==F zstV@P5E!9LK{2u;|8hN7I$6?1S6{{Z>J1fBFq{4az~Atiz9_${4i81>iXbaGb;80S z)$zN;%nN@m0(FK(Sc*>PJR4n7aZfODX{ED>=I6yr(USuA++QoiRride?rmA2IEX+p z4-4|9FmK68s!^&bh`K5QTakuCJ^I)U2#V0HhoPvVnE)~OkBUTrPpsFjjf#j~e{Y2z zjMhs*DKe5?>a=ZfvM#Z>G7&$RU0z=f4(m%qu?A%tlMC|D8Lig@nNil|!#&Q5InN1n zS_Hb)y5LPQs?^P9`jE7bUi1E5>^0nY7%xV?>4wddY;XM2o_j5+H&eSIkdlYT$u8zk zE~Y$<^dhvU&4kV9s;=JJ5nQOVqi{1?Z#@h~-7g6c-J7OFxPy_XB(P=(xHdAH_>^Ca z2P1STC`RVT79LJCC(++XYB|7g+JbiA9JqDx2zFZ3|AEUC4@T&kAT#pfP4Zl!oTR$1 zvfsaOUl;;bG}?jK>J~UPyTI{YoQXfFI2A8Oj1<1f$3B&i>ns~z()149i$LCZbiU|~ z{=59cA{w$Y4TR@mGfM60L7$-jff2g(Fc>8OeOlYUMk8QdttfD9WR$Y_`Z)AHZLNeq zrJxvT&-N6SbLHffXwGyDhU<~u0~at31F!Zr^l1dbvWa*oLRSP?(ca5F<5PQtK1T9a z64aIE!~3!|%5pWn?@~_ST@5Q&?m6)zcqw8|&^;m1RNomL)?Ax-m-P{_FGHHf)B)XX zY(|T|=387v9>P`>N%sc*x(tFMbn9U#8nyEGcl^f$TXbzxS=6qw|NoplBkCt3cmyMK zDJVtV*!$z_l?Lg*m$=;K2x83W|}7 z_VxvzXFoFI{U*y#P<-C+iQWJ<0ecBrN-13v9?=M06J$ma)m>NbTykpQws0A-(Yx^( z79$atZi~nkRyCq$`_dmMhc4j7D52pVPpyLNNa)!HlhIx1+TN`)} z-J3Rn&1m}S1bWI5LNr3R9tNZCHw1`;f9=y`foKF=8yQtwCoAF6r=d&v)2E3S$3Cf< z5$g|M-4arN)NwbsoccWWc2!>u7%lNogsup(BI97>_^_&#w zCOx-JZ^CGc`7gB&yc8W*&Rv|8XXZU)cFuf;`6(F!S?#>CuOLe}PoS}rMKgl=8f--u zB$;>P5sc8ShoMLg*aP<3rU^FgIY(fdec-pzkSF)d888&_1EYY=PmTr4KnarT%A1KP zVkg(qi==%ZA>5A=SPx`C&fm6G3}kpIQ{P{luISU;3JVaa z=7=gQr>oVu%eVL&Ttvq40+jOmeFN{c*SaT8RAp${R3Q<_GUk~$>H6hTH$sgGT04IM z*nn!^miXQa>zA7NQ4D=`+8T1J@}Q^vnT!Jrymqt z-UAzv!6|+`_QPLqJq$((z--afzxHLKz)_pPwULqf>pO~gFhZAtVq|9|J>vV~{#f+q z=lw%Yr4nClz+t`;_SCg&pqmgFiJ)ZG;NJ_Gk@|}rSGk%Ux_-IF(q!>pu!hA5kv%5f zIlAQ$?Ps;$3NM}SN&)vpg#Bbs6ue512!rhrkNtgn^jfgruS8YH2ZILYF?{ z8i54u$%MvBdJfdo=;hR;*o1Uv5wsv7+>HR%0~wI@)*HuT^C~5Fq*(^l%Mez;0`$|# zxGSdM8C4Zon^sOg6@DAW)Rhn1dv|tJFELhUi%80T;04+;?f29loiSFT%!YxfEem9@ zH*8v(UWub|K6fY6ztOF+$sRD4$+7|mYewNTRCXf(>D1@#FGNQ%oulk`5_s!?+rp+Q1?2f`h zHR%0~wH#|EnL}3eNer^!BzlmomaT@weV7 zgYk`DK9S!2zQ!Y*RjYUb>ewDwyFIRaN!KH;Im&DaI34JX+GrkgJH0rauHiOU-ZU!M zQD~@SQ5pnANGhW{hC@nbUt7`konjw*VpZ6K2{*r{(|O;nHx8(_+x$2M!SMvH^y;i z%2Hp)2mD54PDeVv1$e98W+jlUrSG<$0KSkW&`|lr<-Je;NNcH3PU;!hd=diYuR`Yo zwjBl^3TuD=^PBR)-HC|-OC;B>jexR=M7=>kxI3}+Qc#4#BKf~`ezJdP9s7#>ZA`a$ z0eTgPFm@hvo_M+fB!s&YTdxT+qKgPpf_sJ|=_Df_wf9GQNMRx3^inyd?Bge|&wQcx z;@+ZaykgK9z9K`Z+-xsoyuhQvt?7;k?%S}z5~$Xvl~rul}2+fDbYw}YB? zFC7OzaJ?6UwfWhlREIL?iq20b5)}iM`vccTMd6f&^U#CQdMPMH{LQ{)(>?uBE_2p%0_9Kn?-zmT z(0A-~h$EW_y{Zv+Fj}t(G9#0Y4tfbgsnPoaCtNwT3g9h_wKQ$Dr8!Zx?Hft#DfR=> z_zlbKN8}vrnzKEl&qy`x8gLRrAQ2Cn?0fj03sj3g&)iq%)(M-D%JrZeut&okjMiHZ zgHf)zzyH2X8R5>TfsKRyvilHWi$8h|2tUrj9asj6P$PX`nXZIib9%oMePG5z5+j-SB3~u~xDV z`RaJZp(CD8RV+^*dKlJc7^-Jqw*!I9>$rb8E%TR}^yPgqF_F!LumR2Yk>WFiKaPok|R2W zn+h*P`l^pexmr*1e!lo=QQ-GiIUovkJ^vcs9% z+Zc?x8we0`|2Ro4CI;;J0bCn9v&!x9tk7S$u9t#hUnX#4cOU+cvBLAbklo3y4cyKpd}>TGjm8&6}F-ycj@uiIcB}}FckR! z$J}lIXGOre?Z50`C110B26`}BE8$NmC`F@Br#t19{6;D-9X7kpzY<^#Cf0qT*uf|$ z;8rU>jKG>8Gn%Rk__m!O;-M~8q;(r18@#n>mSVyQjxA62bXN*JRd_&k6fZ_ZLCCn5 zpJ#XU=N3A1Wee5-TbqkZC(|TE?02^$57;tw2L2BDdR3A6V2_478(6%rbVlnn zL1x6ibXxqzwh!kk(?b33im>Uzibgc=&6IPCSUYpx9DQ*ZaYG(2MzJ(p)ag3kSFe$k z9go%D25g3wNY41vjfJYd>4D&*tZ?B9*wILHp9CdzM(eGI!N>^s#LhLjYhZ!Xws6Vz;7d>I^o+~ zU}D6NbASexfg+T!RC3DR=U|D}lh}d4`;t>{zz(e@0V^%yQ#tS&6ofk>gVjLxKTZ#Du{Iv{ueVQh8qwLLghI z@}Dg7&WxyAeJ6O|l6C|(qDh2WB!~$21GwIL7>t&HzKrc3`!Z2!8DL#C_E-1<2ODld zXS80*pFmW2*32QXontqt)JR+Qo4GcSkr^#Eqg@|uc0p&fUK37o(r|#1>QyDD`zsSV>s7H3G3H{EpSR=bzrG zoY@(<`&?G#JJ^ht7zDMTGg@yw3`X6+ir9pIWh4fCat2%*qtVubV*X%n2I0*Ix)c$kG)l@bfIO-oq%We8IVnQDLOfFTZ+okFy%(~L6xsxf`P=Ecubj!{hE%u!Xldh z!%5D|uodxLEJyDrgF6_l-RM6z8$(epFz0yeU;8vMF<^j>{r#)dp)ZEegV9 znn)P_n-9fhay96|Xub6?7*znfqs?s^jBqE`z^-e+Z(}G*|JgutvbE) zr{6t?C8=lgMncwaHNcaDUN~dFJJV-TN1x}4JF#A`2{IyswvScyJXK8ZLb^sr$c`4m zLbNyaWm5Fn8DjUHrKbH2=eY0+LWB`_+gityI=fz9V!NoGG6an7>jP^)D^|GOvnDVK z2#k`1-Io!3OGUpu!~FoRw;l!~Ibd}EYSYm@FU|lW27ElSc5P(zLD#tnI-~VcP>i~N zeh?q%oeQ0Gs922utz>!#{OsB=;v`WFEJT!# zmwj4R*c6x3Ws@a_G6wMqLg)Q|(=n4Vk=-kFO~2j~3+zioEWTj#V3w_tgirFuPyJCZ z*g=S#WCD*#qV@ZP!AK2QN+j{GjAVh;0l>A9k@}-_T09t`OF=QRYxkU4-u-#;k=B;& z`Ax)3ndotu2==(l_49j8JQ$&Cg3O5gC(D&p`7;L%KSn>U*ZQ>pi;=Mw$=1OZ)t&SK zrE^sekJI7BXw`%}mAp;=Wl|$qq)?3iJOar*Zu`idkp9qzore~Te%Ot{X5^dRiO+V! z=+?tvBnP~Hy|!szhC3w^1rGXLyEZb)phlSB!3bRnic!MrmG@&OKc)t{4owNFNk83& zUX^wnJ0%+9ndirY5xOSGjQD1bYL0(vHgZBVcD$3KE{Da)cK?l*(Xlc~ZbWkIRyW;1 zycp@K*moseP5khKEbGn*ZEjHnGVsi*Lo4adopR!%yDJMha$qw`bm4jhc4@fNf%UtM z!Ke|KB|84E(Fpj4L=?C-GP+sP>Wc>>bSWrC{2mfdqv97ue|0ga{TlXos|Kb;W;EE* z=FDEYSOYLSTe$Jq$((z?60L zzs?d#n*OCr+o^De6GX(1bC3rVfGPt;$fsjfV(D2@SNd@2S33pS*a2{GrYw(5X!$#j z8Au3sBY^cl22@qoM^U(|(!@w_I)ga%CA?#VY&(v!gu5lejAWya5$q%|JY^$-iONO}rB_-@QWJbJ=8;nD^Q){s8F!+R7`}=$T zYZQ_KUYr5f#wavBmvaXM1mTSbx)cNrT#QVgyeSn9)pY`wSkq&t)`!#{ zm4k#Jq}J$~AR|)olI0sG9zM9gz2;5QJ$HIoQK+;{|F=oem!><#fhtooJ;(7vBzUW~ zMMBf#g}|&^b@#99HxNkCT@mJt?aORslP4NOx2uH0MwIjPIUY;W*6#G5yN$ufsD=Q+ z|Bq2fTG|x&ZAA22zL|$%RmvDO0qARcB?IYLE1SOmuG!6Iv=0_R2e8fVIlNZ3k!nhdnDdkes2(d#tlwXU*^N^#Pm+S%!1Ab ztOqh5A(k*!`q(t7mL(Tiopin`SbXgEpR`0&>g3NShds8D&||`jPv)U7EY_z&DSsS! zD}9<2Zuau;@u@vz*vy|?7(9=!DIKC~U_;l1CNi7M-oy^{P+ z1k!4V<$erH$d_+_Tpj70f(2*a&OF-%LS@rma#=jW!K<(Ef^+pLE2Zab93N|l*y_{Lcsm4g zo;BjI2Mv#5h_=b%gIiqWWOz6rx zChM75q>$s2nfZ+CZ+^of)HR{NI+*UacWWBwPH{Ec?|2cCmQQ%!*-OV(;A8vDFQ3XE zfjs*B(`q97;n(-=?P}-_gx`QoXm4oLE-?1sP8!$l^q;$p!6*S(H&L`{<_v+63@{A@ zTpJlt4@=>(XTw?ve@a0yk~9AK>y=CCx7v7$na_>|iJ!W`?70H_GwC6p-~ccRL12Wg z2{NO)qi3cNe>tQhJ1{hUr$_RG`EiG;e{5B%e z`BfAP#v*>45e+N@MW{_z@a?4+9MN-L4tWDPKf<<4|9AMy<-dI6%B@rt1`@*kEP?ev z1{Ba!M<&Oy(9Y6Jzd$+H{RI}F)|lYD%I^biT}e@y6tz_N9oq5om2-QwM(=^-mD9xys3b5l)sjWTw&<@;50N8dIe5QcTOk>l|3^yOtT0<;8 z3CE65LFWUOf#PH2^?SfJ%xUoWC*uI4Idye0@SFt_QS3ZumLv89bUt7`koo9rjj`Vo z$Zc6b{k(;hG*<)`AA7Qk94FH5zW8pF7qqP>;u~J^r>7a+;>{})JuY7=GgRrAd!;HWyJox-q z28xgQZ>#S=Dr|MBUtM#t$@aVJgN{Bj*yDHaGv^-S!RNnvAoB^^D!p)WmEt!i=kJHR za^x?;;xjCzVyZyL_i44k)nO|KZz*1Ubh3gjyl`?JcKTFv^$p$AVBnCBaP3@^prG7^ zS@Ri|)H{b^N1yOw<^VkS{MUAve1L6voj2tJAqNrx#x}sUvHNN@zRn8*f{;q1OFJC~yhcWI%qvhVamNyb#fp1gjV5sfX+D(d~F|8nEM#&c8Ae1quePjf|RS4&26r5xNu0$QbW#&m$d(u>jXq#m|vn z^W69{&Tl67&QW~#4c{i2zM?7jP!utMnu(Pw$mUYew?W^SO$uaiyTu? z=x6J;inrplQerHp2GNA9un9SZ+2b=A{omgT84!nik&|8-OXkf*7HKvG(mq&#bRDAJ zCa^`XocujQLnTj*-&WL?#ZgTJS$-KgjH^*xr=)9vb!H*=4c~Fe`RX~!9~;lSl>{45 zmO>=@!+PAg5O|L;_=p3!kocy2aCc^65)#0QX>35+e)>!xAl#kVdMPME35E|C*uQ)% z`5M_A@taMEO$7Z$RT!I4YR9`i=uv3BCdh~gGkhj;N;CUy3PuN*3+H%XA$q>>X5ZwO zs-9gRUpE;2Vt$QRXND;3$vYV{^=q->xX1M5{3QgkLyaepN?KGbsWt zBGq6u}ISh*t!dO(C>i|!FY!p(aSNZadWq;nnu-@~QJ&a3k}!9Q1e@vKe} z9*oefhrvh=I9*}rUl%J%Nds^Eu^H8*wdsH_$8blZwG#f6f?^crowstY!%vCv+H*nW zdx88-;Onu%M6AX4NBv9ALRYk25oAT*chj5yii*+g_})k_a|6i;OVLhxr|Yu;WLH`e zFTV{KdDwtgFdC8Pve;Uz4b5@83A7bHU6U_G2m0#wQFNAk}zp0gsy146qKTV$}shZeKtOpABJQsyQj&1 zfQruF#!|FRm@WjmqV5-c2 z!Ansv!@=zxBs59XA6IuTKSBs2kh%P-qzRc{#B9GxA2~?n7Yj3oTLLQJh)8 zu@D=+YTX-CLj|4DdQFfSrOj2{G2yE4ZKg{R;;e-Ab2sfkk*27?w zd&b|N{9hAn;BZEFMj~CklJL@)ioW@9TiYh>6v*{(%(b{UXt15dw;{xA2Na zQl?y)5f^V0th&;$sjiZrL?BD_`sC|x{cKVfbt$*0Eu(-Pjh=WeqtAQ6&1k*#Fc?(; z$4h_MG{MF_%$Jcc1%4Z2kw=Yq1lXhT;~eILWuOSjHKu-fnKI;e+(cuHLVxUX64;+n z0mFd}fyl{#`4K1xcN>7!K=u=L^yQOJCch`Wnr#aZKkMLq89mf{GWV0H#GV_>fg#i$ zH!|_^W8f@k^m9=5RK|^+rGY=LN+Xcloe2iVOhRXc?IdOPt#Vnw_7f`h41Ibc?omG2 zb{KjnfP-GB{xJxN0_PB|T^s!zVAwGSJqWFrf)ZqY=F=%g3E#nrZjH~5UbCwNYT)op zzznM|>wRB@$59LGH9*}tRTew(uuWsn~GfUq5LhYJV_RK zAv)Lnj;6v_6k$l}P_`$I2$(8T%Ty=TCM@$kz*!$}E#U?qk)WprK3m10?=}V_BjB-C z`^O+8DQyb;HX>ROGDU9`gF6U;WuOR!b^ZML`h!o&;YNW=S2IY}d2PY#5&3w88>3!I~JWZq}V8K&WGr=4y`L?&izO1BSO; zztHU*bK?tt#j6P{U6k@U&Z^7gE%S+9O83Dc{G59K5kvHW(zv4#c#kmn6j}HasBIdB zaQ9(AGX0l)I+~(K4~#*$`G94h`1rhBoLy|OJMxbC3tTf1QIQ9kf zP7JIEG9QBct-H7;9Bcg|_>Ff_&n$;y9uaq!y6j4jKA?f;Q>-tkoZe;jvHx-AuLqex|B?_Eifgvczi zcVy3!B85~WrAXPUkj#>Z5E7!utb~y4mEUo7j{Cd!`*4OH_w#u4*ZX~Z&hxz9@6URF z#OW~yv4>QrQ5^mO`;G`7S|B29Tp~U~{CdETRDCWb1-wR{0)aLN_*AS)|N83rZv~}^ zD2f}2`z~z+Q=qfJR2E4Jv>~4+3OPNX8^rX;vy8HOGrdh>*z5K@W?S_dO!Rb@2I&Sd z7O&NKGkIBN$C(eP=t+36{(Hx3KbI?K*Wsj|TBhRgLF@fYpBD#SrCHCNE>d1%hMAr- ztkKor%J&G2xh4Hg@9cj``h#@*MF{m^0_6)`P!LzpNyNj{i zyD9vot=-CH@1EH!a`du(B0i`fN-p_3Zp(Vo;l4{A&*02=U=Q_#6VuMar6 z3aa2RL-hKcHj25$h2A5=$i0XHXR~Muga9Kxpu!N)NM&flu=95m7(rGD7}+0dI&ZFb zwQTVHgb7o?&QNXe{7;c8c^WjmH$xT$Mvy1O80lwA^B4=waLhgvQT1s{5yYfL+yC5- z9xjOpIg@m{*oE%qD^wT-Tz**j-sGfa+s`{YqUHF%;PB0XF5&hKRioAKW{ynGyq>{~ zk>J;JBA^z*4`t`x^!(c<#;BYEr?kY$MLu9Vz+5B6NbYmSEfg3*RtOmJ|EWE#Ry#W} zTUVM@XA{AC6spQlLB8xNzalsh1xAo3#2D?69?IVI{daS`ltmfMafb((Fq)}Q&9vmZ zQ*o4Cmtx&x6?7*$D8)H<4yN^P6+0C^bKsNNcN{)uF!pS#o>!Q25 zenx>2)O$o2b-TM*B*H6%lfVH_o|*Bm?_%7 zP6$O&hK1fErU)o^CcG#`@Z&NOQ9a;CDn+x!6+@sDaZpfD80kCca~m0(=sQ|Da9cYN zuI{wew{x)P*0-{nGeEFGASk?MvOp$8u48#u+?st`oPS;X0H#H^$b~4nQMCgUB6vjx z{U8SD9rrGgdykby2d9K-U7Ia5F#)vwsLxhj?oGwz)LsM1wO3A}0!T=Vj#+X04(IRz zn&Nd0LO|KY6~a8YfE;&T0k(a?-ulARm;qX5{2N-fhtI6Rz9YiNA1DoTbcvZYFJO1B zkpjeb=;1>UAb1g4uriNOZjwS?Ptm@l%>}V$)qgg8g{}g2C%>5^@@y*)h!A{cz2FHk zL{1h?->-R3+K>5+bkLUN;`S@_g*bQ>8{jcP{k_)M%iY<-eH zrDPD;lb^gS{zTo$Wz=3hnl~J3%Ed54WNpy30(mT3=shBgnt=6I_eE8R6;?rbd4WPz zKqJLylSV@$@>sTDWj>buz9BR{c_AqHZQUx*1oql+u-;h|dErGGE}Bjh6hV)Rn4+ke zSAjGdhWk&m+5WEV@W7T>cU-@&*uTOFxQm5apLq?XCn_o8b$e;6d#OW@u4+dd^Yy=t zIDGG8wT-qq3>uAHRHCax_=7Q1RQaVGMPj}1nusXkF!Ay-SgIHa0Y?jgMoN+EVFMJ2 z^_+!yD+G${O|uG~omU7m99mlx`Q0w^6v`Hmb=a$y5t3c7(t#8V?=Fq=5Wo$ zD=&O&zH(j`QDnp7FY4j3N^mU%ZYHms zx}Di{S(Wocv>^Q(rybbGXZ+gbVJO51>OCTibb#g7iX{&7d4W66=Nc(S%Xt6#powXq z6#_;X9)HRvor^EY+OyllT+ddYfwskdB~Od)Z0U_ap+?a2BF1Rymhj~-K9nboTJoFy zEg1G=!idVuHF&0X7mF&6Do*^TdlxE7LbY1rE^OlkXv_WUrz^ff3{hF-AJLiO_#c`6<~m1gMg5GhWk{- zQ{nys4z2Y3xRLJZHgFpn9w<>lGRE(6j)wNcA{e0k^Me>51(*CC;ihWpyRQdx#Ppfn zz@$T(-=}-q^P^vBr;!dHd#m4>=R25E-VWohF zdy@WN9?SswJn@8zz#$^F3wB`rm@MS(+rVoO@#Tp-r zUOh@}jy<&wbFM{G0v75$B8-}V-5)wjR3TvL>p#}d!q?YFfDpl}5NLyd5N-3&*?}g8 zp}2=xj&_X`=5f&Zj(GCr`^ePy55PbMuR@?7!~m^{(Kn}-h<5*U1Rv_n|GXO$KuQzQ zR6k>+?!H~-pImPccmS0OIjtNRD}8(6WVj;b1sRjAkvKeK0K?T9SrHMo9k_#6JFF}* zs}SXeYtT_ecohQsjtC!B;FI3GR22fM@$mwURE0KDAG!nrgg}KLEAzRK2h))ceQ#EG z#c@qs%W5dl2R9J;29l@L$AkncL4**D%pgyQA)1&Kf1CEfwz-&xGx%!{KYk@2ao8d;L4JQqmW_Zc1s) z5cNb`jG!2qLA^(WQ4?@0Io(oIYGGh#4m47XR^UD9&|m~wAz(C=n0;;s-{8kqN}5;v z{loXpfpyS)*O6m%eK!M|W7E*{BE~4HA;0jKb74A`K}3p7&oVK{VD3PcA%5QD4`@Tq-vaptb%uw!5ghpOSA`_3My z4E{=TeA2&jGK2Ua2x5>Y#0d4Z22GFfT3o(m@hI6ZbRq^5LaDRgWr8BtGt!^49G&=~ z7mo^|A)3P5X`^cgd#T%RUrcW|#o@>D+aE9<));2wU7orO*R=~XLOauR(bWEcdXEUB zCSX7>zNq>jSnJ^72a1Z2Z)SXT^so{Nj36rnjA%EB%nm=RJ99^{a($$dR^#&&dB01S zE>Zf?eI#Epd)Dz0)ukNZ^CHIR+pplv`q+A=J32!b)6Ha`V8SSW%LbFIL#OE0%0)}A zH=IyJg^@caUA9&8@uHVQD-twBQr&TQflWN-59ujgjrU#>ru}kH95Y5M?sB3jqX)ew zB8(J(+8+*!suBFW5HIjn$RV;cHrkh3ATUWke z9eXk=ex(0WJegg=wl3tw2i;7WwQ)E+bM&OuNozkB^Mki`J1u`QfSI4K^urgxBnUnS z0Q-)Jo=#wzPIsyF%vGczi)1%ad{saRLO{tZyz*ELok>K1&%eGS#s|3O z?%bmIAc$eYz$_imNK>KhKW)oFfDoiYkQD+#ohO^}KlGYN_#2m13>6f__zV84<5|Q` zzS3HLWjhy$5Q0<)@`M}z3 zr9n}Cqc@wIM=xp|5@@_B;=U1wf22merKdjaXSmVTX6rk)xtJk({y@$Q)F1@<4fP%o zMmj(#*rcVVLO`8#UZ9a;WcWy&5(P$(6#_=$<#+h|s=gf3kNln+v$DZL!W_KP#fN-6 zpKEExjshdd6Jm^L=z7-mpRV3;m?=8HR5Cst6Gr*5v+0|z9?RQuvL?Y@q)rSKM#sKI zzh=Ig#K~S~d5c}RB^ZZS+8d&OWT!4mA=Sg1ryg_*W5!4?Y19SJ(KUs__Mf!+9# zwGIbxzkD_W=Q`52&kQW@PY}wJs)!%FLDP$wpSL;nbRa+Qu?*}xB6@~^Vg1fUV;KT^ z`1FLySLt`eAB1Y-!>2-k0fG$zJsdi0)>-4JDK9ULPBZ8yq>q;Y;L4zI%bdL>j6K~ z2-d|c-2+B2czi$`1bmX7=VbgUe){A%t+CW$C*B9In!yd!*U00SQR{;`az@ z&*1Rf*AkZbZ#QxAGe|iS%ir@7Gd?C`wWi4N0sD>!9|d3o_2DJr!wXzPgOQ$oiDQ|XY~~-kKbb?V;I3C3aD^jD?4s|sfU+NW@d4c+rf25C%9svm zSvFtZ-*)~DPflVIpEvSqvze0TH_PGqn1d>lAEKhiI$5rOkw)2XowpJ5aAfsiplTjn z(eDh}hjE5Xk?|r&pYUR)=da0Q6x;8?z9XWC*1(H`X^Hgk^XLITQt{cPaKs6@_<%MD z^bB!V>y17aaQZxyrCKbZ#Bc_>%+8Knd^CQ#^dT1?&<|pKybDVB+T{va4>dVQovnS_ zgb5$dho|IQuX}FhmT=2uG0$a3g^$~@tF;cM?v=|o-}ZQE-#>`Mf4DF9I^c}>?(MYc z$;^dQI+*cseZ-Aoa}d~fMEC>)1z<%N#fKGk{*Q0&v_8;C0rEaCfTGOVf|dF7r@nnE zXLfy2>SL!>BMAymrJ!YbadLzjGL2|Kgy4tq3!V@|^ojdr(}V79zzxcT2(R_d@RxVU!1qXq*=v#=~dOLOgoFj})S14OdnnS0T^_0ioJ)d!>=L zv8%#m8>$1eMvqLN0r!_Rl5Y-buvlFNA_U(FpdZ8l^=tRhY&|Aq5TuL4=XNWtS$pMhV1!_0qT5f? zSj+$&5ATHLf#3&VVBZnplK~9C=oVEWczpQz^nf2JJ`D}^50R%ppbY{(8Ezl{%1@j% zYwOA5O>`}f;09MR4qhTF*O^+!+Ju}Q&<$dGJa!Ii+DISCGoRs@`qtRnfr%c`xRxPZ zpR!^4`VA|*IluX%qNjYrsPad%onEi04rC{WcJTxEl6h#Ros!5Mi#bzYHm>RAftj8Q zC;u!%o2v@g+P$Kg-B`3d#!>2IxPPQIi{Eo_0N>@?&FF+}yJOq5>Rmt}u?Vc~PeNFo&zL`O}z%vYu9HK&K&=L@fS zqKpb6>hDZh_REGg^D8&A+|dqRfy0Zw-=}c*W6D*V2fM|r?uOmNEJ7^j*djrQ;6K2* zH$DHhi7;{pP99b-DnjtFj8_EM)B!Y7jCg(Mp&MP`W7&e0`E2OEa{8;>ERCQi#U3%^ zUBLt3gpj^CIYwu{G~ED|2tJlActVVk!|>OaQn8N*Pu`XLk+)WC0A>UtQD-hZskJ8ZkU9MmEX*r@@)0Kos2m7#!*t7_empNci7z1wAMzs$?du`i=)` z4_2X4ArDJMc9j)(3tL%ChD~$teZ}Eh{SLGG-8=sK&Yzfmo^VI(cT1Hh{e|ki!he7Z zZ<`1s9iRwi@1iP%Aeiw3n>&C;iqW5SatFbj8UaR-6#_< zA(&O00lS(=vTDZ%%e_%h1UW)XQHlJD@YlBsy4UM#NnObQCXR`s5XBAaKmHNh_%oIA zPuw?FZB!J|8|B#eCHCwOS@xXuN6uRp9NtE#-X?kd<-TO9HpV0L@l2Sd$l|8hVHB|p z>OCTgEPRV%V)>ZtlMN>{9_v{W{en%_kBWv5!8D`7>NTl z7v+{JMxuZi0gV)+b4Jv$XfOh;5HPZLy8r0Av+;eJCU40}U0vZJaDb*SO^%WJc$5PQ zHG-ZOF-DEC`?y@h@dY~hqgvb6f5Ud6%SK%2YN<&_7Im5$zB9cmyisAqdb=*0-|_O` z>+0clpNhN`;N)0Q^E>4dKGpYOn!73b^$uai$o_xx*h-p(x#fTqJ zBcPFDEAy$5hxYgM;^!P+Cc}3#XNmi71NRHb{Kx(EcTf08puh<7gczfe z(5SMjCuu_Y?6*D=6}^&)DX6V|X?L70UGLAQYw*gcu`5*OlMv z4BhCjUNrB~%1#Nwgi(Q=qK`LkOBP3tN^_X!g$Ps_6+H0%b8~7;y)H?!cl~G;P)Imb zt%j|@@H2nLbikUH0lV{iA=ULglSStRxqLL=xfp2)DR~SSH{#yc{6)`}Ul0#NZ z^$KqZ3}n3DZ4nAmIgn$AeuL7;V%jv;*j}|NoxaJmP`;K1gSOT2{A%uztJuIqqS{$?$b|Pu#=d zGlEKJlva7TTBMI(uRs398#6=&tL#^U5W#5&Jrmn$7^ zfn$95)AaKe2>6Kac<1OXIuc!dN+a%1%%OZvaOeGNKrczg_xlwnp{g+OAJM!M#N?du z2oABZ;O0F0Znh#Esce1sOIh7~sM@CpLO9e_qE zKHjpFPf&;tWQ9Oa?`-Dpp!7nEOHU2NvqX~>?n2Ac(kFe4=2V|5Kg2!Z%Oy+?#mII#81e$f;N9wR;;;BHKykz!Qc zNmUC95PS?kFN5*+^2gGv!0(~`Hi zGu#}9&i%!HMOmZ|wJ&mv7J82eqh13qFRrD=G=5;=3}~bnt*U2XM}rY)g@930b9%FZ zckk`nK2MW2?Ng8Jfnu5yIYv}tXQ3VW2oh@Oc@bkoIXND(Ta3-(0CON!M#mFdOfl^W zQ%IyxP}cDPzJ!D=RxeJW!pNJxkAG@h#_q(|9xI-wn@wen)k zm<%>~i?P}`W=SxneIP&5dWX*w+EI;wBFGV9ikLV3X|7RYX3Dag_PB6JuMHDL^i%7U zc1&4&+a4dLtFyl(kBXx01$1$joX=|JHI7DUns|=k@IBV%sTW3yX^hps8NUc@YQs#? z^!MMiD8vZrJtB&Ffop#mmr4;YkYEFilp?AExds$s1X&?aR1uc$F&lYr;Ax+GMvunZ zTTQn?jFic5j#idvL=)2>Plz$<-n>icOYD{GZ#$|>pG7fX+e_4-`2DKsNJ;souUpu{ z)j0Z4VWb^>Woz}aCuX^Mt2Yds`l64+JH9C$o$4Nv_T04JJ=pDvI%bTbI7Xq16A>iX zQ1208!~tMLvqX&efJv9RMv76*OD7cTuM1WP81eUrMmbDO^tqnQi2JN3nb=(e9&ybg zzdF(39W$zHroiV#jL{Lt!}^Y|qIPV^VsUVeWjKgQjq0*GU8y3o)t~=4tg-d*!*i%G zx>;UxF=FM7zTL-MXfDrW{>9;cPfnZ5p1#7--t+Zyhb2olW{f0XorjLt!(+7Yo`^8Y z1GZOZEj7ay02a=GMv75*Vq7%}HG-@VFw*fTjj=A+eD&M=zEX$G>>bac^;bUf_194e z3L_L5HslF0Miu&d)(`nLH(C^O6uh~5xfc^gRlASPB*~b{naYa!q?*eAL4{Gyg}-|a z-?lIO%6R47#cvs#aQJqsJR9Nq_$SQM)w6r=pP0prQEOx@G+_;o(cGJ!f7?VDg#&lq z1uwe(3cun3l(+tmTIOM1ZBZaZ9I!J_pbY{-3fesergmMgtn3}9oQ7J()uFXl3UY+< zFGQo+rvL9*5d##}&wA$LiFbFTZ1sXYdk*?y0!Xwa$hLFT(RWLO1UJn^2|Ox*{C^c4 z>fRSJb0K*{8qHMF0USOqp)@gk{ql34lVk_3xIW*H8K5!S;`3lu1V32|z9S-h)|~b7 zqF8b;12%5+l4sU_VfVy9fDp_U0T$-15D>~^cyCa#CED6`vf`e>pr!XyP=(rQ$Pr>b zUHT2V3e9<$_kHk@t0II+{`z@|e_*w*@_qmrWiVYKXu z;f*{cDNn_-2G2&}@Hs2G8qTlm%h8wUJN;?!#&gUR^*?%mrkFX@dqfmf0GT!Y62*v@ z4|pv=Bc>kA6#cUDBfT#wii~?B8-iHcQfkl$jD;3(K-_BrR-jrG6< zWgU6i`=yjSingf*@;x&6idlgUk55D8&)`?_=X;I_8$;m4#FBUC^8wfY0*y3=(dOSs zLleV5D+GLWY~6Hcd?;mkm1yefJRD?Yz)UFW7Fj05v*$=VNDqRkYUpth6BHE9_gJWn z*YD2|Wz>pY%VjVT#Br>IK|@s1qD#%QI!LQzFDin%r(XQ5qc&~9X$EooJ8;Y3@cDx` zGt+E-9I0sZKa%q{2m7&Chx7szXYc1;^!&>vqR0~14tr!#@nMA>k@E@wcjp0(l%hxN zEe}8ef*+PGSRqgp?zA`m>?fbAym$8`o#=iqr2)+`dXkIKL5~}@C@_Ma7coZZQd{lL z^L)|iw^dp%p3Z$A6GpschbrSKj+C-Dncps;eprPHqvDNK^d|fEZM%}stu8JQWrM@N z%rfZ^Ih~_-?><#IeO=dD%tF*r>WJoy2h@8+81(`h(WI7&5ic<71T<2N=q)2(pb#U- z3IQWSmET)8-K}!EPkYXx!lYuj6ild7&Cn)NhV&k{Epu4H2KNsqsYj-s%ZYjl5$64LoDh87wgJjo2yBHARe^Y*v( znxVXhR3xPC`eCLhHCG@7MJ5FG9uY+xKqho@sT2tTXYheWO3^85hj2!t!X!rP zbstVp4E##9J9l${dL!p0RAQ9#sa_~$t5$%>CesRPmY2YNcdmAmPZAy$O8(6$chh5t zOvH@Q!E5%3C@_M0j|d|jV5ZuC(V1%an8wE|3QXORXGY;4MW&Hsv|xpRQ67)1vG?1C zCTfqEvRUcrV9zEnvCbkNmsv>+`y$6^!4qPPwyaIi3A@7ZYl5$Z`jh;dhnO(ROMNg} zNa1^phRHZSA$tqDvP9=`7a67tuHI>u$*p_R@DAADDEn8ytwG;`J!-Exev++H7BfbC z+!@doStL`9;M*p`C=a+qI%`pk;KyYmdgK#CHs&RdL4@GPW%CvY_%u21;cV!8TjS4v z?yXeP=gesru&kaJego<0XPMwO%<4DfZ|>GS=2+yVGHz(h0#t@wyY}JXj25}qQ@d%lr?q3h zfBb2+;&D)c;Kyb2Jx7F1=UFcb>P3SX{5lyho%0`aLOJTUL%>9c12!uMZ4l7ueMO&F z5&fJyUTQR_-uQ;-B$x*6DkDpS_J3o31d@Z`)A{$Ph}qc_eXeNyCu?Kcj-4Dn`%A4c zu~W(+-aIvR{+ngcadWkS-RPFeaY$*ydo@sY;OPNv5a_&d*txw?aTj=izSJgwrh^ zGtzVy0nYh{XbtsY;K2r0t^QOjU_ z;0|_{>isQMPl9~ujZs0={HSXmUfY`$PdgPOTdUZE!|T2FIJv#CmcOZKI~T{7Br41r zlvI%i#V-W<4ZSBKjPiiNvBsjQ4-$-k$iPd!vhgKbv=fYCNH7Ae5HK=y%lT3#pr9gh zQ!-jjGjc1oPkt1~4g4G>F2)+}52PHf!Vt}|gLQk>T4X@L-dni11Y0?1`KsN)fJcy-_ z<9o#4L3i~11}{_q#TCZ}^WPp?m(Epwiq1a?D5>XNm3f$vOV>HH>$?38++OV4@t1d~ zLEG^WWYquqjtHM%11}1;Me%`;WxT+}O>>P@g+>iFLOU1XW7&cg0z&Rns@~P(x+b4` zizehR1mc3ANyT;K2)Vf&5&;o{Pl*;hA%0#_%SAVy!K;vvpdRoeh3Lq0R_H_ryb6If2ndDG*gpAv{ZZ&us(=j5 zj1=8S@UBEEIr770x+8ZgL4@E{2=s#(prlxzWyQ}F6(y%n)!gtF!*=&LyUQfCw|}B| z{;QYPG4xwXPytl$%hh}L)8X*!w`M$R+7Dd9;itr{PpBlUwyBy`nz^aG8T$!E#s2$H zHUzIiVBZnpV-Jk$tfyd_f?^1D}S90!r=`*a_Aao%V%Xz9-h&03&4IB$ceWPDq96l z57>7^^aPuFQMfKj4}uuR2gER-k@6!caxDf-f)KSGZE7((3{cT<^1Mx#LA$E zMDW^Zna$)eY_!~XABYfw7zTMl4AGiehQU=j+#`(*0xh|XvP_sD8VxoqFU-5;6(^1&G#R57+Is;;*pCGXoG-|y~~K?GhyjnrorYaSB`aMae!0P z8-Vo}k|DiW*a%cT4?cu}ZV>a+$b85z?RMG=d!;`spP7CW!o&~L+nIg04y`Dyy;RRB zcjNvQR3a4EbD+Do;)2S{xaYk}&XRj@_~-J+znVufoA174UY*gnuNgBx^m2|U<`%)e zBcjJ2Cq@n8g@atjo76|0%G%Jhr&Du-XdhOdT-mgVp0UoWV z`j4aaR)z+Yp!mRR@Vpnq;IN!f;I&$QDsH(<)~WdDiF261p-jRXe(dTkcHjsp`rXJiUbj`+D#sa{jV;RTg9s2tFuJ1dycyhWiR_atU;M02k~M-buH!{&RA z2pdBnb}fFS9$s_=`SgGvY2-@njfWx^eC7k%AfS`Seep!@kITpKMM7!9Nds}L(2B+? z^0>8TCCvzk4!r7seh|a6SMkL~p&xsA^^<#NTv*1i)enw{@@zOehAVYCt?f;<<6|-^ zc>Le>hOk?0JpD${#JA|A2rUk8wK~=!JK5zFL#%pz!qtaknBjS`bMreeYQcYYVBZnp z!w=k0M!Q7S5!EB#$dMc!@e)jX;8kbd!aO>kb@sEXUaZl!?M;h!UvK#oM2GS}%DwJ8 z#{tbj!~gm7UJ!$G!cnCs|LsGmv+Ir@>}Pwt4U_7Ks=oWMyYc1m(@u;V!hC7bO&s=k zuJ#P8J7Wgtk!}zd2oAjJ%=a7-Hl4tD zYw}xs5I^^sgk$$y8F;)fu~X2f;>?4;bF0)}E!?_M6uMX@xB6?JTz4gY6 ziVpmEix03n*GT#4a1?@WriUMIEm)aPdBQCdKWVh`2u>d|Y`tF>R|_qp3X;dI_;lK8 z5Fz;S)`BO*5J{MH)>WTjNQ}+6sK|G22e!ki)%rbd`?7m9o0qK~&Qp!@L>0Msj&j*N zs#g4|Gr%8iecZJQhmXEtWI5s;hsSA0U?FYCADose`|NVWJo(PaOiJ}HI2d9vuzX|RQBX^ zhF1Fbq43N0A1b~a03JgUWFE{%*V0XhQ=JG-$yEwV|%xJy(-cIra(oT$PqI5 z%FcmYg%&&^hRE;rudNTJSlKjh(WiU08)6r+^5PwvPunzJx(g@DmiQ-8!WA6|0{^UR6T%``h(z%|q&N^*<_ z89MeL$7sP5VvMFj1DmEkoAs+sEpt!a$SH{lBa3XWnA0V50FLKi^)JwK{ebQX^h+jQsLf7b3@K!4qPP zxO85rIBnItRZ#ob{m@v`A50h>QkE^g@aL()E;`NaJ=Phes4!Zu^!4PB$+jzEx6<#n zyq+os>b)lZ+;VQM?wK6^^7le};w@RP+~oh#Zw0m;uAB#c zdHSqb-A4Dx>fQ$b_(W6yJqum?BfL9oYNGsTDg@94h&%M)) zsc*6q#Am;RG#=*|0%Ms=A=#Shefq>5$SGQIgqWi0Yw?$hZcxQP9#M-6iF>;S6GacM zO$`kQREK16YfPRf{$fD>H7bufA95C?JCF+2W%_@Z4d}j=-i+FLu*r{ z#L%byb49WZn$XsuK(ehtpPX3DL4x4FB+w0Fe$*V^h4ysv7Q1Y?t($(zo(B^@*A>)- zr{CTTF!dNIY0OB-E355HiiKCva=P-#RNhi}tcF|_=p&*_TPLgr`rA##}c zVc=VP7gQnmPzLrL5k0EFWiEn?hBA11fO4O}k2H{#{=9Mv3}o>1fHny9(B5mjv!}g& z)ZQt8v8&FxS_s@=e_4urn)-&DzW{Q4KtG7_sr|VuO4V`O#4;IrmXPN=OEBSMLLcDf zPg`OsE5y9_aKN1xsPGYcrX^eTH2b-MK}*F%-Az|RUPN0zj^quY54^Vy(WI~V?0z#@cT+WwY$@I7U zoBOW*Wx3==a9hx``(y$%MiV{?5`-WTf*c`c=s|hKBl+)r{gqaH0aW_aH!(4E>VSf@ z{>S{?^3zxUc#c`Np<<}sgzH?-XWeMbE!%B8qf?g^r{}7djA7#0%_C1R5ztZaS7iC@6xg5Gd-Mtlk*+yK7*i zZR_7#uCu-M;Cfk<7a2u;o;T%DPy{(bOp$U(8`tZcDSWob=@a`Obz{3zGg|Q7$<#)t z)@wFd6X)!b#!ykjeyhnxg~vI{r~a&t7VWYI9Da?VqqQn^9eYfkvfK;BC>6{~)NCFY z2`UkM5-|6o=U+AvMI69EpJj_`5qu~U0nX!-&o2hQ<8c5Pf)8b&4FW-yp(#dq(adv zMmY~S|N6X%$=B@4+A1?t{AgSpJkJteR@7Xv=a+BQbw(UMM(fMCcz?7r|MEZ&zk2oJH5aED zF*B4-*9mPlga7^(dXI=Aci^3HFB-|VYNuzX7`y@+`zEaVDHLPMK4f z0pk4}CW2gqz`i5GCj$ryYZetD1o(*P36h5}1Ciw4DDe4bgMd$ln5)O&RjLWflP9#C zBGrU_!FlP+|8Yp8vrTU`3Vio`Q2PV^5*`$5yeXpMElZe57uQ>M<19Z#~ zK867=ned*8@iFkC;8-F)z=c%6k5qjcE!hqr#|N}Qz=!`Y|KM1{bX>JtPo&t?)A--e zt>Wv**H;4y^A01&2lRs&AK%dAm!<-*_B+d46funWpT>kwiqfG0-1JRD%e?%QFQ!xR zsPM^95xBWm;x`=+W7n^$tefmOeBhe*UIK6K{}_Lo9$}I*gT3U!l~2o2ENg&$M}$u; zpg!vs#RonG;uQeuCjyNWAh|&tiuvdTD+Gi(*PKvw-~Bl4g-V2HfttI%1{lAF`N`r} z_$qcZgL&w25i`VR8u9q}HM2KAZsk4ItO>z(%EwYT$6M>Jqdfy-`YyrG%p9l~%1jxq zk$rep*VZq^LU#X-1fZ5{XmNd3dg5d2 zER`Z2U~vp+q!iIVKG}H`n2A0GG%=tpzPdVRVcv?pv7NcTmARX~gSpK)E(?2`b4lvQ zs;b#4D>>BDucxY)SG`wINk3^Puc)xUl1=5KO1Z*&#TBd7yuxB~2H7^OdSpQ9cR=NW z=uJaSx}+y+q1tK>?nSN)NnAlPf@5lJ8pZ+7HP<9iJfF3(gZ|v#qUU66XFvCd4lWKn z$_kXrH&HCZ<0-hsXEj-UT#G>kh95&L{ON=b$X@DOy2Ioa=RGOkI4&wBuqc`R+qCKX zA3N?cGm9Bk0}nsfWKsMP|5my63~j7!?Tqc`{tn>(M#hG7e~paNVObg0lLLjfK_a$DOcyw(}=WH&Iah!yD*6=pTaMDGHR{0~)F9y~*!aM)B=KRtVp|`+KHy zJEx~day;4OawV$8bZx+Ie>Zt0bj6*8?I=D+$P?nv@knH0W_(1CvrOSK7rOWOI!vD< zv(@HuvDNx%>TB<%GJe0X4%O#)roOhgyj%NQ_4Iog#^*`nIDGx;j=eo4cqzI` z_5Q{DIc_}l9ZmFvdXMOH6bE+O$}PH@3y%>Wa97V{!%cozQ9N<)V`+ppHG{4LYt;xZw`hq9K7`gZqX4wAidiSI7R%fs4p>|9d z*>9Ho9mT(mO5u%PhJ)CiZ>TWRxm@G^uvc66$nuGjkL!*AbD5h5e%o8L6jVR^fQ$H) zl2(HmBeCPj+JJsg07ZEz;3aFJ_lPiZ2QX4vB1XKTz$81+NHOZ7^Uy=1MxYe}Mj7k$ zEdLD4KJ1uf3*DZgC#V5VCKTbwM}uy;Eg-Jc#{23EQ zr{7atf5Az$CnBftfx>|w=TTv_$?S<~*nV%R?LI5Z0xDm$;_%WgTC<{BTv58u_@#aK zQDQ#=X|*-t8*+>m-Zl|N;=rcum5X8oKOZG3pa=X&A=-u$rvj(MIRMTV&*VWH1caJu z{(ji~PO&m!x}4RyCdP3VX&^!HpAzT>F+Y7=PExI%4qa4&hqKZJ zwXr3ap)Ch*xy4^Ur{DhYZknKBGb(6n-hW7tfcNP?padbH=bsG% zJrz>78RKHI!mXdY=zghi(Nq-NTOzZTd~eB+G~NpZKL7k6#wTl>S@zed`Uo}lJEMla z>#<#J7SSoxXS6l;vV7`sVCxfS369e`n*ZVi2?C}&!87PAx3DKpHpb8*JC5Y9<4xH zN0&R85IQOLB9pd7%TRqfOV{ky5W2hZ!tV;x|0+wrC4v|7R+W^$fWuRGANMtoj`FUj zV5DAIN0EdXp~9EDp_()BVSl0bh%gF&M}ebVbR`!7MnXVF3N%uT4)WVV3WNY7$O-`? z9Ze7S8Xr0O61n8nc2ieOFM{hSzOv-wc^lERwJ0!xJR!zN`~021=hbM9Y!3O*ww1U4 z#)OgNZ@+R%+ChgAcjg(<4G!p5R3_w;jvcJye46))jmF!!eC-!>6OI>3d~{!7fDf#TTzQ3&0DkT`Ge zvg#rJF3!8(qUC)1o_(zcFA|z2M<{yBjm;oJ2)?C%&x#nJpsS|#DG4GyM~-$K5+69D zfeE0@Jia@k$@WE`Lk%3vlUF`R70SxfIP;545?_z-RE*~)r|!n#m%mr@KQ+erbt3py zM|(}icFX`3Nu7soEQe15z;{H1PbaXycFBt=e87?%&`4FNJ2n+fFoUcR5DFJLe|=cW zsjE9E;$p(s11;-15TSZeBI7HSi>FXQ#QnV@M@Vv`2jlqfxAY>4+`yG)@xOVWeHk*Bw(4(wv}LRh zGep9=5@Fyt48aEo^&SyMNx*?Mi>2m7Jiz^mKqJLy#7iR*)FA}H46;JNXsBoM`Pmk2 zmG9qkY3}waJ>v&cY8fH&lzMN&F-{a1L7os}w2|Vps@W2{4_eawZ>@t*|hIE>(ob2lRs&pKQ7@&SKZwrN>`xt9n;l+l2|AgO7CM-(+sF zE*|E3e&A!ra#Z+iwm8#~T4ihClrHCYxh--v4lgPm+fo-(%XLV4=Bbi39rh~LTsMs9 zLH&Wxg}}Ze!p9!Sg`5|~2Yy7x#}AZ{0U9Yl^5#_gK>dLaVhdIX2sNE4a2&fu8TzXr zbkK5W=APpV@YJ;~?*Y;o=~I`Rj(`dTp9?K`LJX1RO_ANJayGt7{Db!oDk;HXg6P*S zFL`eH9p(jBsr(KPH;15tC{!T1q-WkeN;{6d ziuIHDXs9R_{0F$udqfzyzXhtQEjl7YfDsQ6#ehbNQEMEfB?^onEAtrTCJkk_)qmf= z{<4Tf!OIhF zBPua!QTvd4IxIVNr;6wgzJih;z=gpxr`w;4HH`?I(un`Era%)jMg~KBOp#->(0fD} zH37E^xi1>i;0IU*>i>WIh(xVjx0-Y=)W4ey&V*y z4FL2c>-2Z{+MGdx-~}0UgP5Op*QRZ@Mr4?K2$WGY2feDn#LpEk`uvYkO-wDTdqX2u z<(Q!2=U3^UXbZXMlyUI?e9Zo%QVFnF3uRyCx{cKtP1RJd~rGe19yE`I{~flsW# zz9XV%4KS=fyVS&*2MA?ABju-qbIW>=9|WNcvNE3$jop@1d8=@&Fu*cO>A>%jU*Jv; z5%Rk|POLs=3lfANuZA2UX6UDQki*oZ%=dj7UhAGHux`P`kkY$NF}k8=(p+>BkJ@@V zs8BIE^d1pK zmcX7Np`~gOQ0ImZXrx-i)8>5^IYo08=B*GYnqq#DZLF9t` zQkJUH?x{G&KITlSy^lq3c=MSo#aV+AmhIZLY2Be?t(Yll>1mZlp+(SZBBIE>lma)f z#C%9dR1f%(GW438%M?tiIbic4&<24Zchd`9*+Mc|JFosOm3c5=ss!CPq(MH@nB13% zVx|%FgBYOoAH>XmS}Tbvt?7Dc@g=+!6F}W^D+He9?%JvCEo^qb_W?Q?T9w=zdfi6K zsgPy)*bn_a;21Xjstvy-DO?$DPT0I1)Vqj%)s-i?{WAyD%i9DK7g;K+?@eya9A8MMVPi_j7G)LxJw1fdLi*+dkD1MkFs(S!(oP$nd# z2mDAGN?Coa3uFj^2>r7`Ac*5$mR!1ZuSUTM#Z!@vToYa3F^ilZP-3-F>Xc@d5oH z#^-X2_<7|MRC|Mam{m^P`#FUPADV9?g?D3Q)I%N9_B1y8*rLKm`EBj#cHp`Y;vj~1lV_$OQYdZ z(A6BtQ{I|)o_-KBK0995pqLT@`;G{oV4xJlszvc(g-w2Wg@Ju%KqHM_@*Pb{AVBa_ zLJL+12!->U(ly8mu3pE`zmtx!U&J0-YyHGWHkvoq8=e3Wf}ayw@Prtm09z+l%hLwC zR>ra&y<$+c91}#*{O&6wj9T8QbOf)xu}1)12=ic|62Psm?3Jkc1Kf^4|>}~80i2hkmaH&5IjbF{Gz}(oIH&2Eb~7CCPDBR zEm$F7L|f^ue7o@bwzRWZzA4jNWV^vc+NGK-k-q1Y(vO^?1xJV}I`(e-P$}!4&wbWX z@~S7-U|X?|DCQEqCnazH+`d$=+o~ZSl@#?$Ts)Gs;lh)UqmFvoMk3E}_)mA9Ppm#K z+sCEaFTf)E_zq@@YTOylAWx(hdXI=Aci^22ERiB!Az))N&`8B-El1KWH;*HyXu%O;iWpjk#ce!R@onY&>O|>F`x28D zi8;1BkY3e5RW>wni97CM2P%qY_eJVe>s{G-CpX`1P5e`F9A0evgw$ATQ}>QEilJO8 zqh**WV(tz^F(!r1iJ?xpdKN}AOc;6ZFOA$QFxAm) ztsJ$vbKPxJ81XW!JzvVH!M)KVfnBGdj}nLHZGHOdMJrz)bDf$jr^)77%ou6B>pqTL zjOO0-{M#nNC>)qt99nd05k92x@&bj4fJQ1tX1M0tC@_Mo5HRYjSe^MLyYojCzhkx3 zsiXVuLtCjC$+uDu6oo_A!y?FxAWw)fvNPl?J9@^6Q{GSU^X+d!6__v@S^k8DsxN6~ zt@=3g$RCf2y}y(ADVsV4krkS|Ma7S;eE*l8w-Xfm_a5i2+rScp!ymZk<6>WO)8$~9QH{XQCvPzG<9s;e zHOLSAC_gAqjkscutJ>W+wLHPQ#+sNqwZ4l^DHJfl~FQ~k5@mjQY{r$6{ ztH2eqHlP>@$$pRh7pl3C(*wFeOpjW?gNQ9=WnG_n#M8oQ@GCIUvqrXBBj>(UH>=W@ zao`-_Jyi4v(oPt;-Ixqzo)lr3ayx+p#HA&iet2&gecJ~eH7JS+uy2wSj1AV|T{q@|skpF4WCU;1vq!`WHr zD#qR9^F!5geg{B=;6vDgC&Uo3xZRmn?WWP%qR=EMB(fFTA$zW}8-HY{j~RXU6M54% za}?d)#Y@YS=@U$fq)xod5c=GEZ~%vI^shY-tsyOy$1jp0Bcwi#S%S3opR5KY2wuJy zdXESrci?Di&{8uY0br{J&`4!y^ktPDa*XCI%v&K~B(B4_KRZXwX0~cF@9Mpx8xr6w zwePC`m3G!)RV`f@2fMqwK|$dhNxM6++as7Lhzg2gfvqTZ2dLO$cVc&8fO%Eyz9uSH zzd3qmAJ^UM%&fgXpEJ+>{`h>?@^{{|X7=m}^?>^RjWQZ;Ik!Y2+o(hi7stGhiUbYY z;~G=C(u;|@YLQ!!6W)b#H}klVX6K6??Xs5AREv5^& z`67Olh^f`^#h^~UHd`Zg+vuX>gJEldfXK&f`E%h9Su`#DeE*6iQSCf-Jqf%5tlEjf7JHhkaVH_hhAootfM|E(R0({I>>e zZ#_4WZXd4k+dht$6RJFT{p4rio<9zy9b0sOVxKZ<`=qY9M>(-rZXX_pY@f7IiOVOq zC|R}XOXD;Dt8-rHvQNd&Pd>)%{MmBohAyo#E}QGG$vz)8&dgK$OS)AT%bp+hJnX_G z+jOJyoG8@UxZNk^=Teiq6<@4>BS(1slNv_SoZTPxx!NPtrm4_Ags~7iSJlYce+?A3 zD&jOZ5MhQ|QTb(|@9X#YoP6r*V>)oRar?9PGIUWov7G8oY{WI&QM>{X#zIOYvWfQl zY^>n8+a~Gp+g*>x4r;3J(Ce#b?>ySpFwIy0_s=rsE_l1MCKIi`H|KqH+6rwa1Scnq z9GURWHr)lcxRyK5#q4xhU&!gc`474ILPG}uV_p8l}WVzv~2 zyYN+qd`!d+Gh(0r)}k|G`nhVD9{N|~mxaO`B@ZpVaP)+Ujw^SrY=5-y75=hpi~88k z!(NrQaRGZpP8N2aN*3-K4ooQY#6@bK$p-gG|LY>QHrv43e_eF&dHx+5x-v!Omy0@8*_-^T)9kcwyC!!} zbllz46wgnta>!rozo#|Wc~u{z!A43XvW>O|xm@=4i=2OI*1cDIqdc4IvQgH~7aGNl zOXZa3mLc!TMXx_;vQa0eho&+ChtDm){bNJ;$KBEF>1^7T>_sBaU8}t%qV$#t^Xus@ zMXQ`_G)yv@zv&)oJ9VGuL@i1inb`T1+5x)! zb8q1#O<*@_S~R?sSeKIZqxLGveI4 z_t(PRhK|$ipURpzBvRts5<+f|F?fHy7~A2 z&aX@}*a_)PxY=+dB zeC^o8BD3nH%e14y43CM+@^$RkVO-f3QTEl}f9ldGBIl{{A-Q|^>fO8kUyKGpE{ys^&^|SeQi%~UNiC4X>axK~Z;#>~L zqaJH&7Ao!dqu_}1PcoLwl#u$ysgc#9c6yCDx8cj3UJV-z%eMSO){Smo>jk~Ia65B{ z0?T`@3}3mr?df)|nTjOdPM<&5!1$Vjy~;*ZUO8sK-0_=g?W^t+d)59?iyX~P=h@h5 z@3s2VTW-$0Y~T9v%`Y#y+bxfKi%!pzH{P#4=gzZ3{VTMe6XBk}?3!F5esz{Rc58Vy z)6MbAH|MLJRQI2Hi`peRHYu?zX~E(HyYnnE_lE~P*zNqwn)US`^6-vF+hdD5 z9x!FNyCme!(sJ|89XJ}Wx^C7iDZ3Ot8Cu<@!Kn_1vJSc0WmNx|qw~img#H}aXx;wh zdnV%V57M>pEqC&U2xTx1XPh-NpOwb-WPY=-H3&|5^LFY*bkJ-m<$0xaO~(;CHf? z>#Sa_$hYIqYHtyqLly?LpVQz~X5cYrWs<=>y61nQ!Y=orX8uQkMS;Wp) zCU0%CbskJcdWtiwi*=D5wHe*Pya%wK~l3VimNC}oq0d7RJxM1|eZAT2~*+4H*c zzTMn|^FL8xx%$zW5TToowEz9b{JZp*1~8g_Kcqw8iLqOzbKqsUAp37r*k+<43j4&z zYP-0*q4{$8$w>T8R0z7UZ?&?#yEh;F&i_P(HS?#*6CuMgvQjE;zHJ`Eod3x1101sh z&zAi+D(op)69rw(v1?`y8d|Pde;Em$tPpkFG&gVAo~(?p zrY<9?GUOld=VPBGX}bZ^q!m>Z_}n)+z_||=>6sd&g~+>4=sbbD8-Ayk2GX-9LWd5w zA0!Ll?qXseHS)o$ciQ6)_akefz;`$OIsEXF!+S~P zjci)W+|;;_Rca3A=q)zfs6iD49($+YJPsUolz9frcvun^BEK;?Rw*jb;jM?zLlB`u zhsQ_*m>1#xenktr%JY+oZsSD-*6_c{nkev;6C1DOz#)we!7>^=St0JNuTM|L9X@v` zHGnSIucY#B?k4t8``c742+X-y{OvqUmNM5wVXyACG&K%;K3P}@yVd!k%FY2;POgSf zGbh4^<)qwjnY_WsPMTm%jk{dMTJyQL;kbK%22~Vz|1&m@95^f|w=_r#kvA$-AKboS zF-yLYbSAo>50nRRVUv}tsiEhM?(WaKObC6q22~V#_sgQIGA@=;;S*i4T2QbdO07Y!n8SXMqs<&Eq?!rasjFA<(^EnZeCkC7#9QAL3t zxu3H&2M){1Dh<*?%Yj20&&a|;+@tNnm6HWw$60Cul?D+ubog+oywQa@oHez>Hz%i>%yGjSenf*R z3jB4$QAas&=x~RLG9H$Mg~(r(ya;ZEw-goMN%Ro9pdXb6FtF!Lb5lc4*Y0dq9D2IR zva~I#DD>QECv?Z557Hnlgns5;6_0d`bT-!D4{6I3diJ`Yrw^9}FtU}e=B9?;;F?1? z4!yYsRTO$!kHSZA=zBFt3!(orWAb|(`j@G6Cc2<|O_K*;2S~d7-eL!zIZK_bXT|CB zG^nD`?_6jJ*1cGM`!*5ugoV)4jXwx>zgj|6n@$fwgbq8<`O*MvC(ys!i6HU~W$c#V zL&V!;O%!;;12s}`m&4zEtr;>JJXs;`uX9E#9)}(1sF~CNx?o?I%6k}1?4vF!pBe-V z$6@E6B}TUtp(Hox?nGq z%6qz**hlT}O?9`v#>>fFvL*_0dG3%8VY!LS0CDhD`uwm`avs5Z? zV7uj6Q+vCT?e{j^+we)Vvj$ZZc*l{OLpg9*yH9A479uaXLo5cZsn)Br@-_WXDA>#_c&RNmlWV7K>SQ?r|AZ61u56GMzFX^Scf zeALdIc{y-c!xw3g79yYjB>O7vZ}7j(mBX#h4CZb0Y5TMv9P9)}*RK^295 z(?6ebVjhHkUxTy|`iCInKpc9Jt#l?t=&*(dO9QYaD(EVen8Ilr;$>w&SrY}m+~CrQ zyBu1uJFq7}*vyb5lc)>}mS~hhBS^ENzP_ z3cX)cs_8iN7!A@w=%I1%SK!cJ?xtr?gbvF}ojtMu>{?y+Q6sO09~+;NafkPWAyd$meGONaA&7JC`*}Z zqOe!b-gKOo5(wL$EG&e5u1kOA!5+e1cZixf5jN~FZ4S%i4W4X)IBV)c(&NEIp83?w&O@^!>Sif;-6gw;2^y6J5}I z$^*Evd2!a%(BIyP*@;8npg|Rdo)}-+ghS7LO-e*bSO|T^-Mh+Wa0tECb$SS0&^O8h zu=j-q^zCNUx3f+7h#c1R_Zn1D=r&HCOK|9&Zb*qJ2@9dm$aP+sS%J_Ovh(L9d)t%{ zx}d+82QcrRLpKw4%D(S8?sSg?Y1)b^3jJ>CcFM6HaHzOKgR~I((Z=aI;^pQ3O?vja zpnKku24Dk413Fj?A2@h14t=x+RTO&E?pzmfr$5&qErkAY!dvB$2s+*F4xNcE=%Mle zp3Ld2scU*{?`5^{@^VsxDhmB!`+UmP@zCi`ccny>goV%txfU#rJKg^tJp>Utd?-38 z4Zt>AqnpErmig3;``m$#7TJHJ!ZQ3NOPXt&4eZXA~EjWlQEW~~M{Kjt_H+<~h zbf20z5w^Vo#(V}=>IX7;16u=PZfb{5^?Naev?8b*>CEqhy(iT+|_}U5+3vl4j;bSyN z3z2u)Y!kqdLx;ycr)N)u4jrEVg)D%Jn~8nY$Ommt(E@jP7qTV_-0RYdbsRXPae^!? z#Qk*FH+PO3I^6aZm4+_Z?WOV_?k4t8!)`t;q6-dt30V_`9X+ve3=TWhYZ(iktPu9% zup}#cfNH#<1|Y(=2cwkvQhD>)&*rAay)Km@gyV+g;XFWo}Rrf=%qeL1F(Bx4gcN^3zzFp&D=2wcY0q9swngl>ryFig`m@~ zX^<8|uRCr0dS1z)({p^HGa*8U9cMpj0Jel2T}7VaN~kh%>!6e@^AqQ0vL*`rrYfSZRyrohcvun^BF|QAiItOs7JQ|L&;|W(X#k^fWeKeMPyCw^t78xt;3-o*Px0*FZE*C zM;v;EpE4qrgoV((D!F9l6&*tNv$2)E5mK1wf__38z*v!;k;R%Cy6vSh-Z=D(wz9M> zswnivr*8z}&SP1=l-tsNE&mr{D>FFVKK~IneFtT+ltf`%zsirZM z7Z&)~UnGMxZABG@p492_avb_Z4bnpBWk>8)7CJ-dCo#gOww(Gi9MOAwq{8XisSXwqpg|v=aU9stxzK z1L#24ku_1^89#X_%blSGvt^ah;K>Sc=e)7o%6ThIvQYyNVZ#n|rBvSF$zC+Grgr$l zVgone4u7se6$Rd|a5LrHEZBjz$}Zz!Nmz*7=E(|WrUibdGjq^G=z{)28obhhF-Dmz6~tq=nGa<{E9K15L<9&z=Y!mK8&8Spc@I5?u!R zXw9u2cv%@t);R<<5S7o>;K1^pj+0CsMc0X;iwRd(C1 zIP}_uq-iUvDD+o$Ne(#l7!A@w=t}}ipW?*@RDlXoUZ;pO09#ODKo?ca+!Z$x zhdxDvDhj>+n;Ok==pQvm3!z`m(nHy%1)&>@(U}mT!wxiD8i4&+(c+T7>cWy7xdRxN zUm|Owz>{Ctd2`^9hI?@t4W6tJw|D9+Ww^&-2O3y{8h{8JcA#ga@&-3H$HJPrmVf-R zej&#VANRABlqGFZMS<68ylpQB4#(vo8l;8D*B?4+(* z8u_tXw;poj(BW;!nkeuWT{rm7$n zcV$bNSyOv@;kK(8IcV5HHqxMq!hVx9O*wuKc91(XNDG0Nn=;GFjkE8a=uC7$Z|p1$ z;K9b-tf`@Af97Uo!RJg3swniQ!zYDthePP!HAoAg+s?kNTu1}E22UfM2@yIRmCTX` zaIa`$A2ssUe{Y}1eGW$@H^`bO@G`XyhI8PMMzxAE8a!Dc?%jR;lwJC;W`(#=1L%T% zMJoU6g%1LIaia#=aM-zBWhrw_6!zy5^Wt&XBgw)-*keoGv~pYFUN>syx?pE>m&&t4 zC@^QA`Q&&Chuxa2iNX%))@LgY`v6&32z%*G z>id!V+yRVbi&v8+%{5Wr0pWgDo-8Mmg@w4+R-K`&GK3xGiR#qMiLhZ=DO5uyZ)7+A znVZ_-twy&iz;VN};-f(o1-|0O4!uKN84*juLg-b_rBjYUfKH!Zj~+r7^moz#Mz%oB z+|~%qRYbXm~FtWw^ ztf`^f4sX^0ujwN+sG`uz%{`J0hyFx^v=Dlk*TaDKglO@eHQQ!gX3vT1UA&mgCun_kY<6-3tAvjdr(43k%5jGqurtp%< z8`$v~=BCD7J#U^bxWgN0P(^{?yT0AZcJXZ*q=m@K4{dH`3G=%abS6aT(BTbR$^y8$ zo7hK<{A&f1vVa*nd>UC31>P;Qo0Vm6pUA>O+|?R%RL+Bg4mY%>($EEal2o4cnP|5; zJLBE9+}{xPd9o%7yW_KY$MACE(niLDCo6=#G)-qKOC$!gr3TOi`;=7Pt)hv2)Xomu zFf|sh-5J}-Qs$Z{>}&^eEXQFFA`1&)cd?JY#Gg5#vp2P;W=@0+Yj^4nGI=Ar(!ku* zxM!VCH1#U zi9^4sK^2AGF!k*9IP}_Gr9_m3h0srLsCyHK9%iD4&;|V#Apo1uLT_2lcKY2QUd>^7 zDb`J%HdjTVkE%A;%F^Y@8l;8L9W%RGS+fk!8_ddOW4fA{Cf!b03BdcITU4`BzIt_PI{5jN~VO;ULSLv3zq+{Ri< ztz6`>N`op2JhIzg&Kx-GK(qFg@vtN;MDFkNRJqv=ey2@)(L?BhzFHo@z!vATriPxY zU3>ywR$ggPMWN@Yki*KQsBL;ni6{vRpRkA=cE;ht=`1 z(t#R%rD-dwDD-`q!<9o2pwpLVkQPF(*zv4#(;Ia9t-kc^bwRJ#Pa1%oC1^lTK1^Zw zF`id**ntkypo&7Do9E>+yu2i8kQPGEk|MvA4Ohke=}dG%50M9OWxL#1Q#<|1&@R_- z=&>49QRo-ST}*>RFV|m6L`hf(-DPcFD?`OT0rU_==y23>L>hpdQHCB}EkwQ`_^OrNZEphUOo-5-!|Mmh0?v&XkE6atT!<~juX%JyUhfk2o8`zE?b5lD! zdzM;Z95;0Mc@3&4@J4Q~7w{VH7A)goNmz*dMRZ>)XV?Y}rH9Z3{Q@BX8zQ=6L&VhS zMi#`O=Nu+ao2#PGr<5#gWm()v4bnpBk6qW!$LCn~htRVpLWkQ@at)USa4})8-hQku z+M5H1+fv#R6;ap^FP~J7#fHE514LONX#4cB%Df8PmhyQ7jRO%fEGLae%ES%q?XS71 z%Squ{4XSa_u$;`)po+rY_PBfryqtX3AT0!5bZb{Dt86_-)0yakK1&|JnH`DEni~3{ z26dX_&=WMMqR_WIjI*)_zgDP}h?1}ndg}S*t!zsP4Wox3LWiT2oALnWW5^NYfsJS7 z<1;I8lu~$%H0ggeQQ$`o#vbJ^hoh8nWMLuhyM;a~chbU|6+4!ixh~jw$4TYQ%g7Pf z5it|2+#%74tck*&w)o3;9QHA?un_j45gC+aK@j%O@l+Z_*szATogkHWXAeEBsmn?J zX9LdS1C(YWJz*j89ZTw4IhDE2Bzg#4(3i>suwB0fbdPfPsLCmE z=!qIsQRw^n^|5jkcGJmHB1*zS=%x*ytz1nLK7}5F2p!h&N74Xn2#$`z&&_P`5U=6( z;j*N;CJOxTA)~F-@abe>A?_1}{Q*lJOdTkt{5Pz32NT|S54D))&)*bQe=1L%SsC6za@vud!TY7FPPRL9H6BeEt6 zyU_E#O5KJ-!N#*>EO@d)*gtx$u<{~#;%sUFU9j&-*i*5*8va)xT^cM-IQ!)+^~DbU}}i1~7QAc~jQZ&<*1YDMuc_vhqQL zDhj>Zt=kdY<*=-DiIx$uBrJqJ`S_e}IP}G<=pl&EVOjYo4ZtP{(K*7Pd=pb~pF4nc zC?2b2NpnpU_%Uza8yq;aU=&$ch`V^v)mG-e@2sI_PJ|81O2xG@c>}vr$=uWq|M~J; zE{+>Ie3%AR6!;Yvw+0+IbajvgO`=w8|fi*L64OOV3R=xbn0s8;TorL zr>BXLrmd)=(2HzpsLZE8rw`B|Ered=xZMpL`le0v>~%p;yICH9Io%U`!PdV=men}) zrW#aH=xvjh2jNcNtwCA{J+Ow4l|yVkZJ{&K1-;o;X@H8%>8z=1`tT)_-{8>aXi!C= z&u=!q3J(2;25BMm2)BmH4t!YCt8Aw;(FJ|3G=Pzvns07u=Ut94g+C24HvPq1WXVyZ7n{NA3WIip6%xlIEHy z@a_>W-8pb*!AWFcA@19Yz3%a94kxsZ@1|x>gbjy^1^3A04dw}c^mW9@v#KGFbgCak0!OdB+l4-?_q z!f2u*3fr-9N@ck+G++AtG7`TN6@qqisH>cr57!pfK0uQvLWa&>AQd;bvh}U3scZL` zx+z!U&c3HX6@}fSc=rI@*$ocLXjl>!0&n=jZ!#|?(Ag6X(L?Bh{+Bd>(U~2~#F`rV zrNlH>xw~OGDScR$wnY_%K6cjXBRKR34bnpBFR$cO?j(fJ&&SfU*9E=w5m^93MOMRE zQ$t_ZziMsV>HRdQqR?k=oHiAQeqDpK5PIbJEE902=R8Jdq6@koApo0Wsff+7q`zLO z91eY(22~V#nlIgym&>r@EOcB>geNS7o;R+h6{mMRK@Xt|`gVB$c6qe{y}6p!xdV9I z&v8kXG}lCd9~c#F<%M$ySy+hs%Yfqp`M3o>aqYTH&724uJ``oXB9k{7*b8UY)DEAt zU|}VW8;;LgX;4Lhm*{-z5$^DV8l;8DPgR<@pCgA3{~kwYq6>QKtFizFXEr`(O$|MC z$$l$uh8Ag1MWOF2-+BQKJ>@le!b0e8qLx_MG+XmJJ%ldki{t@}3_5FS=ym#5a>a*= z_cf@Z&==Z7x#7?o-jEVe5*9)qUUH;zK@D{J#CUoLUC{3n0|E)ivgpz9%XK#l!S%QAD$X)Kg-(f~#SJ203vb$MAF=aGy(?g*Hd~3B6K)fIV=s}VZz4b0lp_!aNuyXV*5;%@^3{H zwjsFJbq*UoiT5VT3PEqa)U*kI>V%_}<@=7Lep2)^#$D3z6-*e9pJ#>8=#VJWjrhi3z2U)w_hnG@CW4mjvhi6^nHW?te&6;1K--?naB$Y zgl_Y->^SRxk_9lb^`2NQA0MA^l_Pfm z9cMUM69wM6ul;Zi9Mbqe78c@8^{}<_Y8iH%PG6`rh_GQTpCFYtume8KP3`c{KG~Ib zKd`Kv*Px054;fm^g9C@P+$~AQ!;-KNd3^HTe{hEfeWiyWLWd5&APwN=YGNNX@_IY$ zm3R5j;n}~*lIEHy@UEZcDyMfshYuqQ3voAnvq-7p(BV73Q#02EJ5#bup6#$fcLW`- zv~)G^HX&>;vL*`q%Gc9#xw|3k-DF`Q>>-BN%5r@O`@=s}8bsKzoHY6&lQ$SyIbluh z?W!J=SMll%%gGE4swnW!XM5Ul;IN#0(;zKG?s4gdl}ndAY*N_t@DB2J>d~{8_h7#u zC;uS!+~;f>;5{fP(8=4+?-!5XOo-57IhiR9z_tOQJAv$@L)PLBzfRUff%jSPN!gMD z9bUy&MuR6S#9gTVU@H$lLsL)#5Mjf)lS@*0cN4ZYWu0GjW!EyCJIS6>mhf*y6trJ= zpOoC$@RuG;loew3+&58K2n6R&wx*(GO@s^`ohG$RoE`d)UQ)2>QB2f_q zZP(vHd7^|IUK3@7n4`K*ufQ=wN86>RaUep5jvgTuXRDOah02YyUY>^6=vcBQ3fkj- zz+2qWb{S+Oc(Ov&0S|U6&yet&?4FStfCw9oLiS1JjqD#_Zt79U!@+LK$%SwfVv|Xh zv_%yKzII#AOB^^Hh4j@REkwT2vzL`65Nk8jvnN7_4!6xB3*hQ%Vjnf~*WX%2^Fjh^ zc3rY23f#|oky1)v&5j`p3vsX6*itzN6*~NRRw@l5Z0PVB*<|tt7q+sBH8t+B*{4Qu z+|c0@G^nD$eeRaCa(?(b4bnp7+xssn%j-A%PAlY~GtmWoqBMZf$Tn87riLD~=EiUw z`UMTDDD=MzKP-)x75AJnB9?@O(CcT3x3W)ja4vcXUC=K|0~kD+(^*qPUq5}=0vvj- z+_JPSswnhH2QnPNp^wraErgymxSO&-89Mzy9(wk=py$pj4ZzOBGN5N-#TBoRnU7as zdFiA<6@@2RGL&QdfWIQYh3z5e(Df)^dhu`U>!t@Zjpg)iXFtXDy%}ot` z(#K~j@v>r9M3%Ni6@|X2(E0*6^cfnYh0qU9DQ;zj)y1Op?1|7}S+Of73&1udqx+IW z<62MT$YELON7h7v-xy-A+*$=K7)KTs;x25=q}*8r%Sx6KR2sTq_ms-Bjce%MwNuZ= zDfh@i*z3reDC~%ka2M`w2s?X884I4Q5cV@Wk2buZK-f)7Q3DWR!x|nfl{dJvGq6}w zmy^e3w>;tAhNF}h8dOo>Uvez|%z?ui-m0{Whb3Vl^8J~|TN#hfEJF{W3;GLr00Vo& z!?yB~H`;x?q2h z%X^qyv2x<$vGE-a+ud23^1qrW?8N(~3^?pavak@g)Az#4avuo$wtI{8{!@08_>T(-@t(WUX5*2VBhj`LqF z90DY1h=kf`=g#AWGqB{%tuFXS_;}F&5&tejGIbB|A26tAp!wgy{&$a_-7Wtb3%8SB z6FTxz0Y@E|G=vm7H&>~?%JU<1Zstn#e-+|f{*BZNHg+|C6Xw_b@Rffr4FQH$=2SIS_zm{&ANXegn}2^8 zf7r+d*3wP#0G@194QuM3_0a=wyzn2vM-8gzkDzJMBM)##cC8{Mq9iPIWcD_Xt<=&b zRp}uRN6Mjpk_Irc6DZ704LxlOdu2fmtUQ&g$R7YL0SmCQj?8V zt_?`6PS0K!^eQ!E0SumOPJ=Zy^rr`3tsFR{QK7bs22WOq zyZ@Z?$}J7Bv*=rg8bBB9BXW783H7(jn9lFHzai|@b)_l)tBJz?P(58J4%?qBEQCE^ zS3~8{3TL^{tx?<;ot{gR7P+5zph=MlMjw;SULk??+vO>&u z3sNY@*1^%$!v-`CM96S-;@(guZg61-60xQ(AUTugE#jbIM>$f1Dhm7Py1lD7Y}ipg z)gUbde&k?DWlc7$Ql%TyndpK(N*=(Cb(E~Bq1UW9Kyf#Oeq4hp3O&GcYbG3eg(gxW zO2R_u&GK!vGBxVglpcZz9d?u_VJ;=+yQiy>6=ND{#O$P9-Sv5HwO+a zIFKwX#C@#VK;@)BShHf9Q!^*RhGiv{msH-!CXiTDJN)_TO;*ZEBMqu3@UvexSUK}` zhX!dOa`&!P7O-bRn{2GX@AQ2OIul*c8@H4OaAwCHv8IOpDYRWA_c$yoGc~B9(96U( zD$iXGYx#E#(n9ECT0|-LxB9&*W6wrkV`E9!>`^F&b-0fs3b4?U> z>b`xHBaR^Kkz`>Z>vv=I5qwb9Cc1o)l)=tyUx3wpaw(g4n^p0K8d{@0q6{cz|@HK?M{ zJ4CMAheJ=>nVzr^`oa|Jtt=U?+l3xN7xZQF0PIzl5q;gYyZfgoK8pf7&PN(lQRwrW zj!nX$H|;7Vq9iPYp13WOJ1;KK>ER}N2wl)0Ndp+zQXF$rJH1p=t?9Va%XO2bZBa#` z=efRSJq~@225BL5pAV&bDd#Z!_kU^k1PPYD;m8n`g?p(9`17o@DLP0)Az@Y_ikcEY~pBx*j96$;?&>TIeG>EWa2kI-8H#)Pj!kXIQ56kDt%5lT8vRQ*F z3cOi{Mpn*z&DT@L!;-KN`NU#QR?h8i+lwAT7xc~200uTswnhn zIXrH2m&1pkuDxYMEC~yt=c_$dIm-e5ke2kJhtLK6lQe*lEsZxfHFW3uW4htcEBngQ zwy2`eJ9t&>fJ2YgAT5M`@5Nu$@ml_{FFkuAbXdzP_mc&1^E9!K8hNZ!h2FgTgtdG$ zSrY}m@%2tC4?|DM!b0568z0={xM3|X=})CWgbizXh*aL-%H~#BQ{#TQ@5L0{;jtQ2 zQQ)PD^lFYfylj6N4@<&AQtni7lLn$uRjTM-CmH zYJee{vs2)vm*kNQ% z6n5lzyTiD%pOb}!unWXqu`&)XJ%mbw2piV!5mI@BJ9`VLT zz0}Z5P5?Ca4UOl`VOUs&;|XNG=Q-pJCU9>HFW0`;q`d+hS1Xw zlcjA@MWHVo67vR!K1hSK5c-wjg}dM#=9Un8_PU^_A1(`EV8>UQn;Lp1hYiJW=*=~# zqR`jvonhs|i#-~oh0seppJrto{&@tQi7x2PM@j>*O&|tzBgnBR&y#q0nX5q+gBs-x*lbU~jd55R8zGoZKs^|8G(3x|G3gDMLB?XktmVPLSP z*9(;rQ4$tHe>Wz-GhSZCh0#M0p~La`U1;g)RjEl^{umT*e0?j3fpni zr3yIglVo8b?CMoZT3N4>Vgi*05jGqmc96;&&8MZK$E6QwvLFU8Cy^RdQQ(*APpZLz z!*Y^-qKt(BBszo5TU~w{!|)(O^u_o<0F<2%g&L*8txD-OPXt2*0l}y68h&XiHFI6C%S@BWyBJLDqxSdq)^iha*nP>GDD2cV_b7WP zA?!G^un=~5{gYNY%q-KXG>EWa4eu$HH?kdw=BCCSw<^NQJ@XqhsG`6}T(cd;-3@Da z?in&3mV|}K=gsb;tjB`iX{(v^5W1jmlm;+3v)yg1si9wd{@@AsIDE{1uR#@so)ns- zloSZP^DG$=OTt3v(_($P@uC8uFPcpcp$qy4c>s1otN}eGHYDl(N*uc99BJB$Dhj=S za@TUW(^qPc7D6vw&}$S9{lQ#%_C)BggBy07qsW>l@WWn} zw{hUmf{)3kWjrhi3z0uCbXCq=gWqY-Mf4E5pdXS4V6T%6=qu%e&-3rV%Sx)n z(zF#-6uO;P^+7oF{u-o(&{sLxSa}HAxP+cP5jrd@sh7$Eu)SC4_A9r@2439fu&gvB zYofqM*O}3c1BVveLKYU{{+#;Y3|_@yS$VaLN`nYHy)xO9&E{Fn+L0-2_~8>y{KN@t zH+gpO*ZThX*ZB1_{IB!l5sJ*u^pKq(W#gF2#-_OP*KKT~!WLqG{m + +HELPERS_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) + +src_backup_filename="$1" +src_backup_path="$HELPERS_DIR/../backups/${src_backup_filename}" + +backups_disk_root=$($CLICKHOUSE_CLIENT --query "SELECT path FROM system.disks WHERE name='backups'") + +if [ -z "${backups_disk_root}" ]; then + echo "Disk 'backups' not found" + exit 1 +fi + +dest_relative_path=${CLICKHOUSE_DATABASE}/${src_backup_filename} +dest_path=${backups_disk_root}/${dest_relative_path} + +mkdir -p "$(dirname "${dest_path}")" +ln -s "${src_backup_path}" "${dest_path}" + +echo "${dest_relative_path}" From d421636a5fdc3b73fa5cb05e83529483d69e75e2 Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Wed, 31 Jul 2024 09:26:09 +0200 Subject: [PATCH 486/661] Protect temporary part directories from removing during RESTORE. --- src/Storages/MergeTree/MergeTreeData.cpp | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 2e10f5a0227..ce27ad24e10 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -5557,12 +5557,16 @@ public: auto it = temp_part_dirs.find(part_name); if (it == temp_part_dirs.end()) { - auto temp_part_dir = std::make_shared(disk, fs::path{storage->getRelativeDataPath()} / ("tmp_restore_" + part_name + "-")); + auto temp_dir_deleter = std::make_unique(disk, fs::path{storage->getRelativeDataPath()} / ("tmp_restore_" + part_name + "-")); + auto temp_part_dir = fs::path{temp_dir_deleter->getRelativePath()}.filename(); /// Attaching parts will rename them so it's expected for a temporary part directory not to exist anymore in the end. - temp_part_dir->setShowWarningIfRemoved(false); - it = temp_part_dirs.emplace(part_name, temp_part_dir).first; + temp_dir_deleter->setShowWarningIfRemoved(false); + /// The following holder is needed to prevent clearOldTemporaryDirectories() from clearing `temp_part_dir` before we attach the part. + auto temp_dir_holder = storage->getTemporaryPartDirectoryHolder(temp_part_dir); + it = temp_part_dirs.emplace(part_name, + std::make_pair(std::move(temp_dir_deleter), std::move(temp_dir_holder))).first; } - return it->second->getRelativePath(); + return it->second.first->getRelativePath(); } private: @@ -5588,7 +5592,7 @@ private: size_t num_parts = 0; size_t num_broken_parts = 0; MutableDataPartsVector parts; - std::map> temp_part_dirs; + std::map, scope_guard>> temp_part_dirs; mutable std::mutex mutex; }; From 4e2f8576e5a6e8e39a16334d0c697d5cb09e0469 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Wed, 31 Jul 2024 11:50:58 +0200 Subject: [PATCH 487/661] Revert "Add settings to replace external engines to Null during create" --- docs/en/operations/settings/settings.md | 12 - src/Core/Settings.h | 2 - src/Core/SettingsChangesHistory.cpp | 4 +- src/Interpreters/InterpreterCreateQuery.cpp | 35 --- .../test_restore_external_engines/__init__.py | 0 .../configs/backups_disk.xml | 14 -- .../configs/remote_servers.xml | 21 -- .../test_restore_external_engines/test.py | 218 ------------------ 8 files changed, 1 insertion(+), 305 deletions(-) delete mode 100644 tests/integration/test_restore_external_engines/__init__.py delete mode 100644 tests/integration/test_restore_external_engines/configs/backups_disk.xml delete mode 100644 tests/integration/test_restore_external_engines/configs/remote_servers.xml delete mode 100644 tests/integration/test_restore_external_engines/test.py diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index 8739414464e..c3f697c3bdc 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -5608,15 +5608,3 @@ Default value: `10000000`. Minimal size of block to compress in CROSS JOIN. Zero value means - disable this threshold. This block is compressed when any of the two thresholds (by rows or by bytes) are reached. Default value: `1GiB`. - -## restore_replace_external_engines_to_null - -For testing purposes. Replaces all external engines to Null to not initiate external connections. - -Default value: `False` - -## restore_replace_external_table_functions_to_null - -For testing purposes. Replaces all external table functions to Null to not initiate external connections. - -Default value: `False` \ No newline at end of file diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 27b71558bd3..4fc2034b855 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -893,8 +893,6 @@ class IColumn; M(Bool, optimize_distinct_in_order, true, "Enable DISTINCT optimization if some columns in DISTINCT form a prefix of sorting. For example, prefix of sorting key in merge tree or ORDER BY statement", 0) \ M(Bool, keeper_map_strict_mode, false, "Enforce additional checks during operations on KeeperMap. E.g. throw an exception on an insert for already existing key", 0) \ M(UInt64, extract_key_value_pairs_max_pairs_per_row, 1000, "Max number of pairs that can be produced by the `extractKeyValuePairs` function. Used as a safeguard against consuming too much memory.", 0) ALIAS(extract_kvp_max_pairs_per_row) \ - M(Bool, restore_replace_external_engines_to_null, false, "Replace all the external table engines to Null on restore. Useful for testing purposes", 0) \ - M(Bool, restore_replace_external_table_functions_to_null, false, "Replace all table functions to Null on restore. Useful for testing purposes", 0) \ \ \ /* ###################################### */ \ diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp index 8bea0b1eed3..9faf77e9087 100644 --- a/src/Core/SettingsChangesHistory.cpp +++ b/src/Core/SettingsChangesHistory.cpp @@ -80,9 +80,7 @@ static std::initializer_listno_empty_args = true; storage.set(storage.engine, engine_ast); } - - void setNullTableEngine(ASTStorage & storage) - { - auto engine_ast = std::make_shared(); - engine_ast->name = "Null"; - engine_ast->no_empty_args = true; - storage.set(storage.engine, engine_ast); - } - } void InterpreterCreateQuery::setEngine(ASTCreateQuery & create) const { if (create.as_table_function) - { - if (getContext()->getSettingsRef().restore_replace_external_table_functions_to_null) - { - const auto & factory = TableFunctionFactory::instance(); - - auto properties = factory.tryGetProperties(create.as_table_function->as()->name); - if (properties && properties->allow_readonly) - return; - if (!create.storage) - { - auto storage_ast = std::make_shared(); - create.set(create.storage, storage_ast); - } - else - throw Exception(ErrorCodes::LOGICAL_ERROR, "Storage should not be created yet, it's a bug."); - create.as_table_function = nullptr; - setNullTableEngine(*create.storage); - } return; - } if (create.is_dictionary || create.is_ordinary_view || create.is_live_view || create.is_window_view) return; @@ -1043,13 +1015,6 @@ void InterpreterCreateQuery::setEngine(ASTCreateQuery & create) const /// Some part of storage definition (such as PARTITION BY) is specified, but ENGINE is not: just set default one. setDefaultTableEngine(*create.storage, getContext()->getSettingsRef().default_table_engine.value); } - /// For external tables with restore_replace_external_engine_to_null setting we replace external engines to - /// Null table engine. - else if (getContext()->getSettingsRef().restore_replace_external_engines_to_null) - { - if (StorageFactory::instance().getStorageFeatures(create.storage->engine->name).source_access_type != AccessType::NONE) - setNullTableEngine(*create.storage); - } return; } diff --git a/tests/integration/test_restore_external_engines/__init__.py b/tests/integration/test_restore_external_engines/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/tests/integration/test_restore_external_engines/configs/backups_disk.xml b/tests/integration/test_restore_external_engines/configs/backups_disk.xml deleted file mode 100644 index f7d666c6542..00000000000 --- a/tests/integration/test_restore_external_engines/configs/backups_disk.xml +++ /dev/null @@ -1,14 +0,0 @@ - - - - - local - /backups/ - - - - - backups - /backups/ - - diff --git a/tests/integration/test_restore_external_engines/configs/remote_servers.xml b/tests/integration/test_restore_external_engines/configs/remote_servers.xml deleted file mode 100644 index 76ad3618339..00000000000 --- a/tests/integration/test_restore_external_engines/configs/remote_servers.xml +++ /dev/null @@ -1,21 +0,0 @@ - - - - - true - - replica1 - 9000 - - - replica2 - 9000 - - - replica3 - 9000 - - - - - diff --git a/tests/integration/test_restore_external_engines/test.py b/tests/integration/test_restore_external_engines/test.py deleted file mode 100644 index cf189f2a6ed..00000000000 --- a/tests/integration/test_restore_external_engines/test.py +++ /dev/null @@ -1,218 +0,0 @@ -import pytest - -import pymysql.cursors -import pytest -from helpers.cluster import ClickHouseCluster - -cluster = ClickHouseCluster(__file__) -configs = ["configs/remote_servers.xml", "configs/backups_disk.xml"] - -node1 = cluster.add_instance( - "replica1", - with_zookeeper=True, - with_mysql8=True, - main_configs=configs, - external_dirs=["/backups/"], -) -node2 = cluster.add_instance( - "replica2", - with_zookeeper=True, - with_mysql8=True, - main_configs=configs, - external_dirs=["/backups/"], -) -node3 = cluster.add_instance( - "replica3", - with_zookeeper=True, - with_mysql8=True, - main_configs=configs, - external_dirs=["/backups/"], -) -nodes = [node1, node2, node3] - -backup_id_counter = 0 - - -def new_backup_name(): - global backup_id_counter - backup_id_counter += 1 - return f"Disk('backups', '{backup_id_counter}/')" - - -def cleanup_nodes(nodes, dbname): - for node in nodes: - node.query(f"DROP DATABASE IF EXISTS {dbname} SYNC") - - -def fill_nodes(nodes, dbname): - cleanup_nodes(nodes, dbname) - for node in nodes: - node.query( - f"CREATE DATABASE {dbname} ENGINE = Replicated('/clickhouse/databases/{dbname}', 'default', '{node.name}')" - ) - - -def drop_mysql_table(conn, tableName): - with conn.cursor() as cursor: - cursor.execute(f"DROP TABLE IF EXISTS `clickhouse`.`{tableName}`") - - -def get_mysql_conn(cluster): - conn = pymysql.connect( - user="root", - password="clickhouse", - host=cluster.mysql8_ip, - port=cluster.mysql8_port, - ) - return conn - - -def fill_tables(cluster, dbname): - fill_nodes(nodes, dbname) - - conn = get_mysql_conn(cluster) - - with conn.cursor() as cursor: - cursor.execute("DROP DATABASE IF EXISTS clickhouse") - cursor.execute("CREATE DATABASE clickhouse") - cursor.execute("DROP TABLE IF EXISTS clickhouse.inference_table") - cursor.execute( - "CREATE TABLE clickhouse.inference_table (id INT PRIMARY KEY, data BINARY(16) NOT NULL)" - ) - cursor.execute( - "INSERT INTO clickhouse.inference_table VALUES (100, X'9fad5e9eefdfb449')" - ) - conn.commit() - - parameters = "'mysql80:3306', 'clickhouse', 'inference_table', 'root', 'clickhouse'" - - node1.query( - f"CREATE TABLE {dbname}.mysql_schema_inference_engine ENGINE=MySQL({parameters})" - ) - node1.query( - f"CREATE TABLE {dbname}.mysql_schema_inference_function AS mysql({parameters})" - ) - - node1.query(f"CREATE TABLE {dbname}.merge_tree (id UInt64, b String) ORDER BY id") - node1.query(f"INSERT INTO {dbname}.merge_tree VALUES (100, 'abc')") - - expected = "id\tInt32\t\t\t\t\t\ndata\tFixedString(16)\t\t\t\t\t\n" - assert ( - node1.query(f"DESCRIBE TABLE {dbname}.mysql_schema_inference_engine") - == expected - ) - assert ( - node1.query(f"DESCRIBE TABLE {dbname}.mysql_schema_inference_function") - == expected - ) - assert node1.query(f"SELECT id FROM mysql({parameters})") == "100\n" - assert ( - node1.query(f"SELECT id FROM {dbname}.mysql_schema_inference_engine") == "100\n" - ) - assert ( - node1.query(f"SELECT id FROM {dbname}.mysql_schema_inference_function") - == "100\n" - ) - assert node1.query(f"SELECT id FROM {dbname}.merge_tree") == "100\n" - - -@pytest.fixture(scope="module") -def start_cluster(): - try: - cluster.start() - yield cluster - - except Exception as ex: - print(ex) - - finally: - cluster.shutdown() - - -def test_restore_table(start_cluster): - fill_tables(cluster, "replicated") - backup_name = new_backup_name() - node2.query(f"SYSTEM SYNC DATABASE REPLICA replicated") - - node2.query(f"BACKUP DATABASE replicated TO {backup_name}") - - node2.query("DROP TABLE replicated.mysql_schema_inference_engine") - node2.query("DROP TABLE replicated.mysql_schema_inference_function") - - node3.query(f"SYSTEM SYNC DATABASE REPLICA replicated") - - assert node3.query("EXISTS replicated.mysql_schema_inference_engine") == "0\n" - assert node3.query("EXISTS replicated.mysql_schema_inference_function") == "0\n" - - node3.query( - f"RESTORE DATABASE replicated FROM {backup_name} SETTINGS allow_different_database_def=true" - ) - node1.query(f"SYSTEM SYNC DATABASE REPLICA replicated") - - assert ( - node1.query( - "SELECT count(), sum(id) FROM replicated.mysql_schema_inference_engine" - ) - == "1\t100\n" - ) - assert ( - node1.query( - "SELECT count(), sum(id) FROM replicated.mysql_schema_inference_function" - ) - == "1\t100\n" - ) - assert ( - node1.query("SELECT count(), sum(id) FROM replicated.merge_tree") == "1\t100\n" - ) - cleanup_nodes(nodes, "replicated") - - -def test_restore_table_null(start_cluster): - fill_tables(cluster, "replicated2") - - backup_name = new_backup_name() - node2.query(f"SYSTEM SYNC DATABASE REPLICA replicated2") - - node2.query(f"BACKUP DATABASE replicated2 TO {backup_name}") - - node2.query("DROP TABLE replicated2.mysql_schema_inference_engine") - node2.query("DROP TABLE replicated2.mysql_schema_inference_function") - - node3.query(f"SYSTEM SYNC DATABASE REPLICA replicated2") - - assert node3.query("EXISTS replicated2.mysql_schema_inference_engine") == "0\n" - assert node3.query("EXISTS replicated2.mysql_schema_inference_function") == "0\n" - - node3.query( - f"RESTORE DATABASE replicated2 FROM {backup_name} SETTINGS allow_different_database_def=1, allow_different_table_def=1 SETTINGS restore_replace_external_engines_to_null=1, restore_replace_external_table_functions_to_null=1" - ) - node1.query(f"SYSTEM SYNC DATABASE REPLICA replicated2") - - assert ( - node1.query( - "SELECT count(), sum(id) FROM replicated2.mysql_schema_inference_engine" - ) - == "0\t0\n" - ) - assert ( - node1.query( - "SELECT count(), sum(id) FROM replicated2.mysql_schema_inference_function" - ) - == "0\t0\n" - ) - assert ( - node1.query("SELECT count(), sum(id) FROM replicated2.merge_tree") == "1\t100\n" - ) - assert ( - node1.query( - "SELECT engine FROM system.tables where database = 'replicated2' and name like '%mysql%'" - ) - == "Null\nNull\n" - ) - assert ( - node1.query( - "SELECT engine FROM system.tables where database = 'replicated2' and name like '%merge_tree%'" - ) - == "MergeTree\n" - ) - cleanup_nodes(nodes, "replicated2") From 06863cf4157765c04759109afa756022dc5e9c55 Mon Sep 17 00:00:00 2001 From: maxvostrikov Date: Wed, 31 Jul 2024 12:12:30 +0200 Subject: [PATCH 488/661] fix for allow_experimental_analyzer --- tests/queries/0_stateless/00309_formats.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/00309_formats.sql b/tests/queries/0_stateless/00309_formats.sql index 0366cdeea5c..b784907be08 100644 --- a/tests/queries/0_stateless/00309_formats.sql +++ b/tests/queries/0_stateless/00309_formats.sql @@ -12,5 +12,5 @@ SELECT number * 246 + 10 AS n, toDate('2000-01-01') + n AS d, range(n) AS arr, a SET enable_named_columns_in_function_tuple = 1; -SELECT 36 AS n, toDate('2000-01-01') + n AS d, (n, d) AS tuple FROM system.numbers LIMIT 1 FORMAT RowBinaryWithNamesAndTypes; -SELECT number * 246 + 10 AS n, toDate('2000-01-01') + n AS d, (n, d) AS tuple FROM system.numbers LIMIT 1 FORMAT TabSeparatedWithNamesAndTypes; +SELECT 36 AS n, toDate('2000-01-01') + n AS d, (n, d) AS tuple FROM system.numbers LIMIT 1 FORMAT RowBinaryWithNamesAndTypes SETTINGS allow_experimental_analyzer=1; +SELECT number * 246 + 10 AS n, toDate('2000-01-01') + n AS d, (n, d) AS tuple FROM system.numbers LIMIT 1 FORMAT TabSeparatedWithNamesAndTypes SETTINGS allow_experimental_analyzer=1; From debcc2e61053f763cb84e34e48275dbebd5bd544 Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Wed, 31 Jul 2024 10:46:19 +0200 Subject: [PATCH 489/661] Fix test test_mutation --- tests/integration/test_backup_restore_on_cluster/test.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/integration/test_backup_restore_on_cluster/test.py b/tests/integration/test_backup_restore_on_cluster/test.py index 1b7f4aaa97d..d20e10e8a04 100644 --- a/tests/integration/test_backup_restore_on_cluster/test.py +++ b/tests/integration/test_backup_restore_on_cluster/test.py @@ -1054,9 +1054,12 @@ def test_mutation(): backup_name = new_backup_name() node1.query(f"BACKUP TABLE tbl ON CLUSTER 'cluster' TO {backup_name}") - assert not has_mutation_in_backup("0000000000", backup_name, "default", "tbl") + # mutation #0000000000: "UPDATE x=x+1 WHERE 1" could already finish before starting the backup + # mutation #0000000001: "UPDATE x=x+1+sleep(3) WHERE 1" assert has_mutation_in_backup("0000000001", backup_name, "default", "tbl") + # mutation #0000000002: "UPDATE x=x+1+sleep(3) WHERE 1" assert has_mutation_in_backup("0000000002", backup_name, "default", "tbl") + # mutation #0000000003: not expected assert not has_mutation_in_backup("0000000003", backup_name, "default", "tbl") node1.query("DROP TABLE tbl ON CLUSTER 'cluster' SYNC") From f9f17fb61e2ab27f90434b5e3fc9081c061eaae4 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Wed, 31 Jul 2024 12:32:17 +0200 Subject: [PATCH 490/661] Fix reference --- .../0_stateless/03215_parsing_archive_name_s3.reference | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/queries/0_stateless/03215_parsing_archive_name_s3.reference b/tests/queries/0_stateless/03215_parsing_archive_name_s3.reference index b4804c82dc2..b27524812c7 100644 --- a/tests/queries/0_stateless/03215_parsing_archive_name_s3.reference +++ b/tests/queries/0_stateless/03215_parsing_archive_name_s3.reference @@ -1,3 +1,3 @@ -::03215_archive.csv test/::03215_archive.csv -test::03215_archive.csv test/test::03215_archive.csv -test.zip::03215_archive.csv test/test.zip::03215_archive.csv +::03215_archive.csv test/::03215_archive.csv +test::03215_archive.csv test/test::03215_archive.csv +test.zip::03215_archive.csv test/test.zip::03215_archive.csv From c81d3322b18b0eb4b45b91ac019a8c4f42d7518d Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Wed, 31 Jul 2024 13:39:30 +0200 Subject: [PATCH 491/661] Update 02150_index_hypothesis_race_long.sh --- tests/queries/0_stateless/02150_index_hypothesis_race_long.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02150_index_hypothesis_race_long.sh b/tests/queries/0_stateless/02150_index_hypothesis_race_long.sh index c29b604d23d..5c432350768 100755 --- a/tests/queries/0_stateless/02150_index_hypothesis_race_long.sh +++ b/tests/queries/0_stateless/02150_index_hypothesis_race_long.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long, no-distributed-cache +# Tags: long, no-random-settings, no-distributed-cache CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh From d5f8540fbbcc7e5f8014212d67804412c7466d7d Mon Sep 17 00:00:00 2001 From: divanik Date: Wed, 31 Jul 2024 12:00:09 +0000 Subject: [PATCH 492/661] Fix --- tests/integration/helpers/cluster.py | 25 +++++++++++++------- tests/integration/helpers/retry_decorator.py | 7 ++++-- 2 files changed, 21 insertions(+), 11 deletions(-) diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index 7d80fbe90f8..7f0a9154be9 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -2692,11 +2692,13 @@ class ClickHouseCluster: images_pull_cmd = self.base_cmd + ["pull"] # sometimes dockerhub/proxy can be flaky - retry( - log_function=lambda exception: logging.info( - "Got exception pulling images: %s", exception - ), - )(run_and_check)(images_pull_cmd) + def logging_pulling_images(**kwargs): + if "exception" in kwargs: + logging.info( + "Got exception pulling images: %s", kwargs["exception"] + ) + + retry(log_function=logging_pulling_images)(run_and_check)(images_pull_cmd) if self.with_zookeeper_secure and self.base_zookeeper_cmd: logging.debug("Setup ZooKeeper Secure") @@ -2969,11 +2971,16 @@ class ClickHouseCluster: "Trying to create Azurite instance by command %s", " ".join(map(str, azurite_start_cmd)), ) - retry( - log_function=lambda exception: logging.info( + def logging_azurite_initialization(exception, retry_number, sleep_time): + logging.info( f"Azurite initialization failed with error: {exception}" - ), - )(run_and_check)(azurite_start_cmd) + ) + + retry( + log_function=logging_azurite_initialization, + )( + run_and_check + )(azurite_start_cmd) self.up_called = True logging.info("Trying to connect to Azurite") self.wait_azurite_to_start() diff --git a/tests/integration/helpers/retry_decorator.py b/tests/integration/helpers/retry_decorator.py index aaa040464c2..e7bafbe29c1 100644 --- a/tests/integration/helpers/retry_decorator.py +++ b/tests/integration/helpers/retry_decorator.py @@ -8,7 +8,7 @@ def retry( delay: float = 1, backoff: float = 1.5, jitter: float = 2, - log_function=lambda *args, **kwargs: None, + log_function=None, # should take **kwargs or arguments: `retry_number`, `exception` and `sleep_time` retriable_expections_list: List[Type[BaseException]] = [Exception], ): def inner(func): @@ -26,8 +26,11 @@ def retry( break if not should_retry or (retry == retries - 1): raise e - log_function(retry=retry, exception=e) sleep_time = current_delay + random.uniform(0, jitter) + if log_function is not None: + log_function( + retry_number=retry, exception=e, sleep_time=sleep_time + ) time.sleep(sleep_time) current_delay *= backoff From 15e0033016eb0e23a7e6f512d5096e50863e3187 Mon Sep 17 00:00:00 2001 From: Pablo Marcos Date: Wed, 31 Jul 2024 12:09:49 +0000 Subject: [PATCH 493/661] Bring back the strict check Also update the doc. --- tests/integration/README.md | 9 +++++---- tests/integration/test_storage_s3_queue/test.py | 4 ++-- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/tests/integration/README.md b/tests/integration/README.md index ab984b7bd04..a8deb97b526 100644 --- a/tests/integration/README.md +++ b/tests/integration/README.md @@ -124,10 +124,11 @@ You can just open shell inside a container by overwritting the command: ### Parallel test execution On the CI, we run a number of parallel runners (5 at the time of this writing), each on its own -Docker container. These runner containers spawn more containers for the services needed such as -ZooKeeper, MySQL, PostgreSQL and minio, among others. Within each runner, tests are parallelized -using [pytest-xdist](https://pytest-xdist.readthedocs.io/en/stable/). We're using `--dist=loadfile` -to [distribute the load](https://pytest-xdist.readthedocs.io/en/stable/distribution.html). In the +Docker container. These runner containers spawn more containers for each test for the services +needed such as ZooKeeper, MySQL, PostgreSQL and minio, among others. This means that tests do not +share any services among them. Within each runner, tests are parallelized using +[pytest-xdist](https://pytest-xdist.readthedocs.io/en/stable/). We're using `--dist=loadfile` to +[distribute the load](https://pytest-xdist.readthedocs.io/en/stable/distribution.html). In the documentation words: this guarantees that all tests in a file run in the same worker. This means that any test within the same file will never execute their tests in parallel. They'll be executed on the same worker one after the other. diff --git a/tests/integration/test_storage_s3_queue/test.py b/tests/integration/test_storage_s3_queue/test.py index e3445d14cdb..9a97e8c23d1 100644 --- a/tests/integration/test_storage_s3_queue/test.py +++ b/tests/integration/test_storage_s3_queue/test.py @@ -907,7 +907,7 @@ def test_max_set_age(started_cluster): file_with_error = f"max_set_age_fail_{uuid4().hex[:8]}.csv" put_s3_file_content(started_cluster, f"{files_path}/{file_with_error}", values_csv) - wait_for_condition(lambda: failed_count + 1 <= get_object_storage_failures()) + wait_for_condition(lambda: failed_count + 1 == get_object_storage_failures()) node.query("SYSTEM FLUSH LOGS") assert "Cannot parse input" in node.query( @@ -920,7 +920,7 @@ def test_max_set_age(started_cluster): ) ) - wait_for_condition(lambda: failed_count + 2 <= get_object_storage_failures()) + wait_for_condition(lambda: failed_count + 2 == get_object_storage_failures()) node.query("SYSTEM FLUSH LOGS") assert "Cannot parse input" in node.query( From 9d14053cfe7867fd688c08b493c264ee679a4a61 Mon Sep 17 00:00:00 2001 From: avogar Date: Wed, 31 Jul 2024 12:17:56 +0000 Subject: [PATCH 494/661] Proper fix for short circuit execution with nested dictGetOrDefaultt --- src/Columns/ColumnFunction.cpp | 26 ++++++++++++++----- src/Interpreters/ExpressionActions.cpp | 4 --- ...sted_short_circuit_functions_bug.reference | 2 ++ ...210_nested_short_circuit_functions_bug.sql | 3 +++ 4 files changed, 24 insertions(+), 11 deletions(-) create mode 100644 tests/queries/0_stateless/03210_nested_short_circuit_functions_bug.reference create mode 100644 tests/queries/0_stateless/03210_nested_short_circuit_functions_bug.sql diff --git a/src/Columns/ColumnFunction.cpp b/src/Columns/ColumnFunction.cpp index fc81efaac0c..18c343c6ca6 100644 --- a/src/Columns/ColumnFunction.cpp +++ b/src/Columns/ColumnFunction.cpp @@ -296,16 +296,28 @@ ColumnWithTypeAndName ColumnFunction::reduce() const function->getName(), toString(args), toString(captured)); ColumnsWithTypeAndName columns = captured_columns; - IFunction::ShortCircuitSettings settings; /// Arguments of lazy executed function can also be lazy executed. - /// But we shouldn't execute arguments if this function is short circuit, - /// because it will handle lazy executed arguments by itself. - if (is_short_circuit_argument && !function->isShortCircuit(settings, args)) + if (is_short_circuit_argument) { - for (auto & col : columns) + IFunction::ShortCircuitSettings settings; + /// We shouldn't execute all arguments if this function is short circuit, + /// because it will handle lazy executed arguments by itself. + /// Execute only arguments with disabled lazy execution. + if (function->isShortCircuit(settings, args)) { - if (const ColumnFunction * arg = checkAndGetShortCircuitArgument(col.column)) - col = arg->reduce(); + for (size_t i : settings.arguments_with_disabled_lazy_execution) + { + if (const ColumnFunction * arg = checkAndGetShortCircuitArgument(columns[i].column)) + columns[i] = arg->reduce(); + } + } + else + { + for (auto & col : columns) + { + if (const ColumnFunction * arg = checkAndGetShortCircuitArgument(col.column)) + col = arg->reduce(); + } } } diff --git a/src/Interpreters/ExpressionActions.cpp b/src/Interpreters/ExpressionActions.cpp index d832f568cb8..8993830af14 100644 --- a/src/Interpreters/ExpressionActions.cpp +++ b/src/Interpreters/ExpressionActions.cpp @@ -195,10 +195,6 @@ static void setLazyExecutionInfo( } lazy_execution_info.short_circuit_ancestors_info[parent].insert(indexes.begin(), indexes.end()); - /// After checking arguments_with_disabled_lazy_execution, if there is no relation with parent, - /// disable the current node. - if (indexes.empty()) - lazy_execution_info.can_be_lazy_executed = false; } else /// If lazy execution is disabled for one of parents, we should disable it for current node. diff --git a/tests/queries/0_stateless/03210_nested_short_circuit_functions_bug.reference b/tests/queries/0_stateless/03210_nested_short_circuit_functions_bug.reference new file mode 100644 index 00000000000..aa47d0d46d4 --- /dev/null +++ b/tests/queries/0_stateless/03210_nested_short_circuit_functions_bug.reference @@ -0,0 +1,2 @@ +0 +0 diff --git a/tests/queries/0_stateless/03210_nested_short_circuit_functions_bug.sql b/tests/queries/0_stateless/03210_nested_short_circuit_functions_bug.sql new file mode 100644 index 00000000000..923f1e3be1f --- /dev/null +++ b/tests/queries/0_stateless/03210_nested_short_circuit_functions_bug.sql @@ -0,0 +1,3 @@ +select if(equals(materialize('abc'), 'aws.lambda.duration'), if(toFloat64(materialize('x86_74')) < 50.0000, 0, 1), 0) settings short_circuit_function_evaluation='enable'; +select if(equals(materialize('abc'), 'aws.lambda.duration'), if(toFloat64(materialize('x86_74')) < 50.0000, 0, 1), 0) settings short_circuit_function_evaluation='force_enable'; + From 6b7c5eb5da1be1fc31d4ebfd4f0dfa0c6a6e728c Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Wed, 31 Jul 2024 14:09:07 +0200 Subject: [PATCH 495/661] Fix drop --- src/Storages/StorageKeeperMap.cpp | 34 +++++++++++++++-------- src/Storages/StorageKeeperMap.h | 21 ++++++++++---- tests/integration/test_keeper_map/test.py | 5 ++-- 3 files changed, 41 insertions(+), 19 deletions(-) diff --git a/src/Storages/StorageKeeperMap.cpp b/src/Storages/StorageKeeperMap.cpp index 1559b442e43..0634c7be6ee 100644 --- a/src/Storages/StorageKeeperMap.cpp +++ b/src/Storages/StorageKeeperMap.cpp @@ -79,6 +79,7 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; extern const int LIMIT_EXCEEDED; extern const int CANNOT_RESTORE_TABLE; + extern const int INVALID_STATE; } namespace @@ -497,7 +498,7 @@ StorageKeeperMap::StorageKeeperMap( } - table_is_valid = true; + table_status = TableStatus::VALID; /// we are the first table created for the specified Keeper path, i.e. we are the first replica return; } @@ -656,7 +657,18 @@ bool StorageKeeperMap::dropTable(zkutil::ZooKeeperPtr zookeeper, const zkutil::E void StorageKeeperMap::drop() { - checkTable(); + auto current_table_status = getTableStatus(); + if (current_table_status == TableStatus::UNKNOWN) + { + static constexpr auto error_msg = "Failed to activate table because of connection issues. It will be activated " + "once a connection is established and metadata is verified"; + throw Exception(ErrorCodes::INVALID_STATE, error_msg); + } + + /// if only column metadata is wrong we can still drop the table correctly + if (current_table_status == TableStatus::INVALID_KEEPER_STRUCTURE) + return; + auto client = getClient(); // we allow ZNONODE in case we got hardware error on previous drop @@ -1017,11 +1029,11 @@ UInt64 StorageKeeperMap::keysLimit() const return keys_limit; } -std::optional StorageKeeperMap::isTableValid() const +StorageKeeperMap::TableStatus StorageKeeperMap::getTableStatus() const { std::lock_guard lock{init_mutex}; - if (table_is_valid.has_value()) - return table_is_valid; + if (table_status != TableStatus::UNKNOWN) + return table_status; [&] { @@ -1034,7 +1046,7 @@ std::optional StorageKeeperMap::isTableValid() const if (metadata_stat.numChildren == 0) { - table_is_valid = false; + table_status = TableStatus::INVALID_KEEPER_STRUCTURE; return; } @@ -1045,7 +1057,7 @@ std::optional StorageKeeperMap::isTableValid() const "Table definition does not match to the one stored in the path {}. Stored definition: {}", zk_root_path, stored_metadata_string); - table_is_valid = false; + table_status = TableStatus::INVALID_METADATA; return; } @@ -1058,7 +1070,7 @@ std::optional StorageKeeperMap::isTableValid() const Coordination::Responses responses; client->tryMulti(requests, responses); - table_is_valid = false; + table_status = TableStatus::INVALID_KEEPER_STRUCTURE; if (responses[0]->error != Coordination::Error::ZOK) { LOG_ERROR(log, "Table node ({}) is missing", zk_table_path); @@ -1077,18 +1089,18 @@ std::optional StorageKeeperMap::isTableValid() const return; } - table_is_valid = true; + table_status = TableStatus::VALID; } catch (const Coordination::Exception & e) { tryLogCurrentException(log); if (!Coordination::isHardwareError(e.code)) - table_is_valid = false; + table_status = TableStatus::INVALID_KEEPER_STRUCTURE; } }(); - return table_is_valid; + return table_status; } Chunk StorageKeeperMap::getByKeys(const ColumnsWithTypeAndName & keys, PaddedPODArray & null_map, const Names &) const diff --git a/src/Storages/StorageKeeperMap.h b/src/Storages/StorageKeeperMap.h index cfbb35ab2fe..8ed348a4f6f 100644 --- a/src/Storages/StorageKeeperMap.h +++ b/src/Storages/StorageKeeperMap.h @@ -80,8 +80,8 @@ public: template void checkTable() const { - auto is_table_valid = isTableValid(); - if (!is_table_valid.has_value()) + auto current_table_status = getTableStatus(); + if (table_status == TableStatus::UNKNOWN) { static constexpr auto error_msg = "Failed to activate table because of connection issues. It will be activated " "once a connection is established and metadata is verified"; @@ -94,10 +94,10 @@ public: } } - if (!*is_table_valid) + if (current_table_status != TableStatus::VALID) { static constexpr auto error_msg - = "Failed to activate table because of invalid metadata in ZooKeeper. Please DETACH table"; + = "Failed to activate table because of invalid metadata in ZooKeeper. Please DROP/DETACH table"; if constexpr (throw_on_error) throw Exception(ErrorCodes::INVALID_STATE, error_msg); else @@ -111,7 +111,15 @@ public: private: bool dropTable(zkutil::ZooKeeperPtr zookeeper, const zkutil::EphemeralNodeHolder::Ptr & metadata_drop_lock); - std::optional isTableValid() const; + enum class TableStatus : uint8_t + { + UNKNOWN, + INVALID_METADATA, + INVALID_KEEPER_STRUCTURE, + VALID + }; + + TableStatus getTableStatus() const; void restoreDataImpl( const BackupPtr & backup, @@ -143,7 +151,8 @@ private: mutable zkutil::ZooKeeperPtr zookeeper_client{nullptr}; mutable std::mutex init_mutex; - mutable std::optional table_is_valid; + + mutable TableStatus table_status{TableStatus::UNKNOWN}; LoggerPtr log; }; diff --git a/tests/integration/test_keeper_map/test.py b/tests/integration/test_keeper_map/test.py index 7aee5df5746..4b1bcd11cfe 100644 --- a/tests/integration/test_keeper_map/test.py +++ b/tests/integration/test_keeper_map/test.py @@ -67,6 +67,7 @@ def run_query(query): def test_keeper_map_without_zk(started_cluster): + run_query("DROP TABLE IF EXISTS test_keeper_map_without_zk SYNC") assert_keeper_exception_after_partition( "CREATE TABLE test_keeper_map_without_zk (key UInt64, value UInt64) ENGINE = KeeperMap('/test_keeper_map_without_zk') PRIMARY KEY(key);" ) @@ -107,12 +108,12 @@ def test_keeper_map_without_zk(started_cluster): ) assert "Failed to activate table because of invalid metadata in ZooKeeper" in error - node.query("DETACH TABLE test_keeper_map_without_zk") - client.stop() def test_keeper_map_with_failed_drop(started_cluster): + run_query("DROP TABLE IF EXISTS test_keeper_map_with_failed_drop SYNC") + run_query("DROP TABLE IF EXISTS test_keeper_map_with_failed_drop_another SYNC") run_query( "CREATE TABLE test_keeper_map_with_failed_drop (key UInt64, value UInt64) ENGINE = KeeperMap('/test_keeper_map_with_failed_drop') PRIMARY KEY(key);" ) From 406ac2279ecbfc24913548dfcf459c55dd450723 Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Wed, 31 Jul 2024 14:48:33 +0200 Subject: [PATCH 496/661] Analyzer: Do not traverse unresolved subtrees --- src/Planner/findParallelReplicasQuery.cpp | 12 +++++------- src/Planner/findQueryForParallelReplicas.h | 2 +- ...5_analyzer_replace_with_dummy_tables.reference | 0 .../03215_analyzer_replace_with_dummy_tables.sql | 15 +++++++++++++++ 4 files changed, 21 insertions(+), 8 deletions(-) create mode 100644 tests/queries/0_stateless/03215_analyzer_replace_with_dummy_tables.reference create mode 100644 tests/queries/0_stateless/03215_analyzer_replace_with_dummy_tables.sql diff --git a/src/Planner/findParallelReplicasQuery.cpp b/src/Planner/findParallelReplicasQuery.cpp index c89a70be541..1140f30ad9c 100644 --- a/src/Planner/findParallelReplicasQuery.cpp +++ b/src/Planner/findParallelReplicasQuery.cpp @@ -113,13 +113,13 @@ std::stack getSupportingParallelReplicasQuery(const IQueryTre return res; } -class ReplaceTableNodeToDummyVisitor : public InDepthQueryTreeVisitor +class ReplaceTableNodeToDummyVisitor : public InDepthQueryTreeVisitorWithContext { public: - using Base = InDepthQueryTreeVisitor; + using Base = InDepthQueryTreeVisitorWithContext; using Base::Base; - void visitImpl(const QueryTreeNodePtr & node) + void enterImpl(QueryTreeNodePtr & node) { auto * table_node = node->as(); auto * table_function_node = node->as(); @@ -134,21 +134,19 @@ public: ColumnsDescription(storage_snapshot->getColumns(get_column_options)), storage_snapshot); - auto dummy_table_node = std::make_shared(std::move(storage_dummy), context); + auto dummy_table_node = std::make_shared(std::move(storage_dummy), getContext()); dummy_table_node->setAlias(node->getAlias()); replacement_map.emplace(node.get(), std::move(dummy_table_node)); } } - ContextPtr context; std::unordered_map replacement_map; }; QueryTreeNodePtr replaceTablesWithDummyTables(const QueryTreeNodePtr & query, const ContextPtr & context) { - ReplaceTableNodeToDummyVisitor visitor; - visitor.context = context; + ReplaceTableNodeToDummyVisitor visitor(context); visitor.visit(query); return query->cloneAndReplace(visitor.replacement_map); diff --git a/src/Planner/findQueryForParallelReplicas.h b/src/Planner/findQueryForParallelReplicas.h index f5dc69dfa0e..cdce4ad0b47 100644 --- a/src/Planner/findQueryForParallelReplicas.h +++ b/src/Planner/findQueryForParallelReplicas.h @@ -13,7 +13,7 @@ using QueryTreeNodePtr = std::shared_ptr; struct SelectQueryOptions; -/// Find a qury which can be executed with parallel replicas up to WithMergableStage. +/// Find a query which can be executed with parallel replicas up to WithMergableStage. /// Returned query will always contain some (>1) subqueries, possibly with joins. const QueryNode * findQueryForParallelReplicas(const QueryTreeNodePtr & query_tree_node, SelectQueryOptions & select_query_options); diff --git a/tests/queries/0_stateless/03215_analyzer_replace_with_dummy_tables.reference b/tests/queries/0_stateless/03215_analyzer_replace_with_dummy_tables.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03215_analyzer_replace_with_dummy_tables.sql b/tests/queries/0_stateless/03215_analyzer_replace_with_dummy_tables.sql new file mode 100644 index 00000000000..12d2bd627a7 --- /dev/null +++ b/tests/queries/0_stateless/03215_analyzer_replace_with_dummy_tables.sql @@ -0,0 +1,15 @@ +create table t (number UInt64) engine MergeTree order by number; + +SELECT 1 +FROM +( + SELECT number IN ( + SELECT number + FROM view( + SELECT number + FROM numbers(1) + ) + ) + FROM t +) +SETTINGS allow_experimental_parallel_reading_from_replicas = 1, max_parallel_replicas = 2, allow_experimental_analyzer = 1; From 7160e954c16100e963371e416878837437569d74 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Wed, 31 Jul 2024 13:32:24 +0200 Subject: [PATCH 497/661] 02995_new_settings_history: Update ref --- ..._23_12_1.tsv => 02995_baseline_24_7_1.tsv} | 182 +++++++++++++++--- .../0_stateless/02995_new_settings_history.sh | 14 +- 2 files changed, 167 insertions(+), 29 deletions(-) rename tests/queries/0_stateless/{02995_baseline_23_12_1.tsv => 02995_baseline_24_7_1.tsv} (82%) diff --git a/tests/queries/0_stateless/02995_baseline_23_12_1.tsv b/tests/queries/0_stateless/02995_baseline_24_7_1.tsv similarity index 82% rename from tests/queries/0_stateless/02995_baseline_23_12_1.tsv rename to tests/queries/0_stateless/02995_baseline_24_7_1.tsv index a391473e7c9..6c830da8646 100644 --- a/tests/queries/0_stateless/02995_baseline_23_12_1.tsv +++ b/tests/queries/0_stateless/02995_baseline_24_7_1.tsv @@ -11,23 +11,28 @@ allow_create_index_without_type 0 allow_custom_error_code_in_throwif 0 allow_ddl 1 allow_deprecated_database_ordinary 0 +allow_deprecated_error_prone_window_functions 0 +allow_deprecated_snowflake_conversion_functions 0 allow_deprecated_syntax_for_merge_tree 0 allow_distributed_ddl 1 allow_drop_detached 0 allow_execute_multiif_columnar 1 allow_experimental_alter_materialized_view_structure 1 -allow_experimental_analyzer 0 +allow_experimental_analyzer 1 allow_experimental_annoy_index 0 allow_experimental_bigint_types 1 allow_experimental_codecs 0 allow_experimental_database_atomic 1 allow_experimental_database_materialized_mysql 0 allow_experimental_database_materialized_postgresql 0 -allow_experimental_database_replicated 0 +allow_experimental_database_replicated 1 +allow_experimental_dynamic_type 0 +allow_experimental_full_text_index 0 allow_experimental_funnel_functions 0 allow_experimental_geo_types 1 allow_experimental_hash_functions 0 allow_experimental_inverted_index 0 +allow_experimental_join_condition 0 allow_experimental_lightweight_delete 1 allow_experimental_live_view 0 allow_experimental_map_type 1 @@ -40,12 +45,15 @@ allow_experimental_query_cache 1 allow_experimental_query_deduplication 0 allow_experimental_refreshable_materialized_view 0 allow_experimental_s3queue 1 -allow_experimental_shared_merge_tree 0 +allow_experimental_shared_merge_tree 1 +allow_experimental_statistic 0 allow_experimental_statistics 0 allow_experimental_undrop_table_query 1 allow_experimental_usearch_index 0 +allow_experimental_variant_type 0 allow_experimental_window_functions 1 allow_experimental_window_view 0 +allow_get_client_http_header 0 allow_hyperscan 1 allow_introspection_functions 0 allow_named_collection_override_by_default 1 @@ -58,17 +66,21 @@ allow_prefetched_read_pool_for_remote_filesystem 1 allow_push_predicate_when_subquery_contains_with 1 allow_settings_after_format_in_insert 0 allow_simdjson 1 +allow_statistic_optimize 0 allow_statistics_optimize 0 allow_suspicious_codecs 0 allow_suspicious_fixed_string_types 0 allow_suspicious_indices 0 allow_suspicious_low_cardinality_types 0 +allow_suspicious_primary_key 0 allow_suspicious_ttl_expressions 0 +allow_suspicious_variant_types 0 allow_unrestricted_reads_from_keeper 0 alter_move_to_space_execute_async 0 alter_partition_verbose_result 0 alter_sync 1 analyze_index_with_space_filling_curves 1 +analyzer_compatibility_join_using_top_level_identifier 0 annoy_index_search_k_nodes -1 any_join_distinct_right_table_keys 0 apply_deleted_mask 1 @@ -76,20 +88,42 @@ apply_mutations_on_fly 0 asterisk_include_alias_columns 0 asterisk_include_materialized_columns 0 async_insert 0 +async_insert_busy_timeout_decrease_rate 0.2 +async_insert_busy_timeout_increase_rate 0.2 +async_insert_busy_timeout_max_ms 200 +async_insert_busy_timeout_min_ms 50 async_insert_busy_timeout_ms 200 async_insert_cleanup_timeout_ms 1000 async_insert_deduplicate 0 -async_insert_max_data_size 1000000 +async_insert_max_data_size 10485760 async_insert_max_query_number 450 +async_insert_poll_timeout_ms 10 async_insert_stale_timeout_ms 0 async_insert_threads 16 +async_insert_use_adaptive_busy_timeout 1 async_query_sending_for_remote 1 async_socket_for_remote 1 +azure_allow_parallel_part_upload 1 azure_create_new_file_on_insert 0 +azure_ignore_file_doesnt_exist 0 azure_list_object_keys_size 1000 +azure_max_blocks_in_multipart_upload 50000 +azure_max_inflight_parts_for_one_file 20 +azure_max_single_part_copy_size 268435456 azure_max_single_part_upload_size 104857600 azure_max_single_read_retries 4 +azure_max_unexpected_write_error_retries 4 +azure_max_upload_part_size 5368709120 +azure_min_upload_part_size 16777216 +azure_sdk_max_retries 10 +azure_sdk_retry_initial_backoff_ms 10 +azure_sdk_retry_max_backoff_ms 1000 +azure_skip_empty_files 0 +azure_strict_upload_part_size 0 +azure_throw_on_zero_files_match 0 azure_truncate_on_insert 0 +azure_upload_part_size_multiply_factor 2 +azure_upload_part_size_multiply_parts_count_threshold 500 background_buffer_flush_schedule_pool_size 16 background_common_pool_size 8 background_distributed_schedule_pool_size 16 @@ -107,6 +141,7 @@ backup_restore_keeper_max_retries 20 backup_restore_keeper_retry_initial_backoff_ms 100 backup_restore_keeper_retry_max_backoff_ms 5000 backup_restore_keeper_value_max_size 1048576 +backup_restore_s3_retry_attempts 1000 backup_threads 16 bool_false_representation false bool_true_representation true @@ -115,6 +150,7 @@ calculate_text_stack_trace 1 cancel_http_readonly_queries_on_client_close 0 cast_ipv4_ipv6_default_on_conversion_error 0 cast_keep_nullable 0 +cast_string_to_dynamic_use_inference 0 check_query_single_value_result 1 check_referential_table_dependencies 0 check_table_dependencies 1 @@ -123,6 +159,7 @@ cloud_mode 0 cloud_mode_engine 1 cluster_for_parallel_replicas collect_hash_table_stats_during_aggregation 1 +collect_hash_table_stats_during_joins 1 column_names_for_schema_inference compatibility compatibility_ignore_auto_increment_in_create_table 0 @@ -141,9 +178,12 @@ count_distinct_optimization 0 create_index_ignore_unique 0 create_replicated_merge_tree_fault_injection_probability 0 create_table_empty_primary_key_by_default 0 +cross_join_min_bytes_to_compress 1073741824 +cross_join_min_rows_to_compress 10000000 cross_to_inner_join_rewrite 1 data_type_default_nullable 0 database_atomic_wait_for_drop_and_detach_synchronously 0 +database_replicated_allow_heavy_create 0 database_replicated_allow_only_replicated_engine 0 database_replicated_allow_replicated_engine_arguments 1 database_replicated_always_detach_permanently 0 @@ -156,15 +196,19 @@ date_time_overflow_behavior ignore decimal_check_overflow 1 deduplicate_blocks_in_dependent_materialized_views 0 default_database_engine Atomic +default_materialized_view_sql_security DEFINER default_max_bytes_in_join 1000000000 -default_table_engine None +default_normal_view_sql_security INVOKER +default_table_engine MergeTree default_temporary_table_engine Memory +default_view_definer CURRENT_USER describe_compact_output 0 describe_extend_object_types 0 describe_include_subcolumns 0 describe_include_virtual_columns 0 dialect clickhouse dictionary_use_async_executor 0 +dictionary_validate_primary_key_type 0 distinct_overflow_mode throw distributed_aggregation_memory_efficient 1 distributed_background_insert_batch 0 @@ -182,6 +226,7 @@ distributed_directory_monitor_sleep_time_ms 100 distributed_directory_monitor_split_batch_on_failure 0 distributed_foreground_insert 0 distributed_group_by_no_merge 0 +distributed_insert_skip_read_only_replicas 0 distributed_product_mode deny distributed_push_down_limit 1 distributed_replica_error_cap 1000 @@ -191,6 +236,7 @@ do_not_merge_across_partitions_select_final 0 drain_timeout 3 empty_result_for_aggregation_by_constant_keys_on_empty_set 1 empty_result_for_aggregation_by_empty_set 0 +enable_blob_storage_log 1 enable_debug_queries 0 enable_deflate_qpl_codec 0 enable_early_constant_folding 1 @@ -205,6 +251,7 @@ enable_job_stack_trace 0 enable_lightweight_delete 1 enable_memory_bound_merging_of_aggregation_results 1 enable_multiple_prewhere_read_steps 1 +enable_named_columns_in_function_tuple 1 enable_optimize_predicate_expression 1 enable_optimize_predicate_expression_to_final_subquery 1 enable_order_by_all 1 @@ -216,7 +263,9 @@ enable_sharing_sets_for_mutations 1 enable_software_prefetch_in_aggregation 1 enable_unaligned_array_join 0 enable_url_encoding 1 +enable_vertical_final 1 enable_writes_to_query_cache 1 +enable_zstd_qat_codec 0 engine_file_allow_create_multiple_files 0 engine_file_empty_if_not_exists 0 engine_file_skip_empty_files 0 @@ -231,10 +280,12 @@ external_storage_max_read_rows 0 external_storage_rw_timeout_sec 300 external_table_functions_use_nulls 1 external_table_strict_query 0 +extract_key_value_pairs_max_pairs_per_row 1000 extract_kvp_max_pairs_per_row 1000 extremes 0 fallback_to_stale_replicas_for_distributed_queries 1 filesystem_cache_max_download_size 137438953472 +filesystem_cache_reserve_space_wait_lock_timeout_milliseconds 1000 filesystem_cache_segments_batch_size 20 filesystem_prefetch_max_memory_usage 1073741824 filesystem_prefetch_min_bytes_for_single_read_task 2097152 @@ -278,7 +329,9 @@ format_regexp_escaping_rule Raw format_regexp_skip_unmatched 0 format_schema format_template_resultset +format_template_resultset_format format_template_row +format_template_row_format format_template_rows_between_delimiter \n format_tsv_null_representation \\N formatdatetime_f_prints_single_zero 0 @@ -288,8 +341,11 @@ fsync_metadata 1 function_implementation function_json_value_return_type_allow_complex 0 function_json_value_return_type_allow_nullable 0 +function_locate_has_mysql_compatible_argument_order 1 function_range_max_elements_in_block 500000000 function_sleep_max_microseconds_per_block 3000000 +function_visible_width_behavior 1 +geo_distance_returns_float64_on_float64_arguments 1 glob_expansion_max_elements 1000 grace_hash_join_initial_buckets 1 grace_hash_join_max_buckets 1024 @@ -300,8 +356,10 @@ group_by_use_nulls 0 handle_kafka_error_mode default handshake_timeout_ms 10000 hdfs_create_new_file_on_insert 0 +hdfs_ignore_file_doesnt_exist 0 hdfs_replication 0 hdfs_skip_empty_files 0 +hdfs_throw_on_zero_files_match 0 hdfs_truncate_on_insert 0 hedged_connection_timeout_ms 50 hsts_max_age 0 @@ -326,10 +384,14 @@ http_skip_not_found_url_for_globs 1 http_wait_end_of_query 0 http_write_exception_in_output_format 1 http_zlib_compression_level 3 +iceberg_engine_ignore_schema_evolution 0 idle_connection_timeout 3600 ignore_cold_parts_seconds 0 ignore_data_skipping_indices +ignore_drop_queries_probability 0 +ignore_materialized_views_with_dropped_target_table 0 ignore_on_cluster_for_replicated_access_entities_queries 0 +ignore_on_cluster_for_replicated_named_collections_queries 0 ignore_on_cluster_for_replicated_udf_queries 0 implicit_transaction 0 input_format_allow_errors_num 0 @@ -341,12 +403,14 @@ input_format_arrow_import_nested 0 input_format_arrow_skip_columns_with_unsupported_types_in_schema_inference 0 input_format_avro_allow_missing_fields 0 input_format_avro_null_as_default 0 +input_format_binary_decode_types_in_binary_format 0 input_format_bson_skip_fields_with_unsupported_types_in_schema_inference 0 input_format_capn_proto_skip_fields_with_unsupported_types_in_schema_inference 0 input_format_csv_allow_cr_end_of_line 0 input_format_csv_allow_variable_number_of_columns 0 input_format_csv_allow_whitespace_or_tab_as_delimiter 0 input_format_csv_arrays_as_nested_csv 0 +input_format_csv_deserialize_separate_columns_into_tuple 1 input_format_csv_detect_header 1 input_format_csv_empty_as_default 1 input_format_csv_enum_as_number 0 @@ -354,29 +418,37 @@ input_format_csv_skip_first_lines 0 input_format_csv_skip_trailing_empty_lines 0 input_format_csv_trim_whitespaces 1 input_format_csv_try_infer_numbers_from_strings 0 +input_format_csv_try_infer_strings_from_quoted_tuples 1 input_format_csv_use_best_effort_in_schema_inference 1 input_format_csv_use_default_on_bad_values 0 input_format_custom_allow_variable_number_of_columns 0 input_format_custom_detect_header 1 input_format_custom_skip_trailing_empty_lines 0 input_format_defaults_for_omitted_fields 1 +input_format_force_null_for_omitted_fields 0 +input_format_hive_text_allow_variable_number_of_columns 1 input_format_hive_text_collection_items_delimiter  input_format_hive_text_fields_delimiter  input_format_hive_text_map_keys_delimiter  input_format_import_nested_json 0 input_format_ipv4_default_on_conversion_error 0 input_format_ipv6_default_on_conversion_error 0 +input_format_json_case_insensitive_column_matching 0 input_format_json_compact_allow_variable_number_of_columns 0 input_format_json_defaults_for_missing_elements_in_named_tuple 1 input_format_json_ignore_unknown_keys_in_named_tuple 1 +input_format_json_ignore_unnecessary_fields 1 input_format_json_infer_incomplete_types_as_strings 1 input_format_json_named_tuples_as_objects 1 input_format_json_read_arrays_as_strings 1 input_format_json_read_bools_as_numbers 1 +input_format_json_read_bools_as_strings 1 input_format_json_read_numbers_as_strings 1 input_format_json_read_objects_as_strings 1 +input_format_json_throw_on_bad_escape_sequence 1 input_format_json_try_infer_named_tuples_from_objects 1 input_format_json_try_infer_numbers_from_strings 0 +input_format_json_use_string_type_for_ambiguous_paths_in_named_tuples_inference_from_objects 0 input_format_json_validate_types_from_metadata 1 input_format_max_bytes_to_read_for_schema_inference 33554432 input_format_max_rows_to_read_for_schema_inference 25000 @@ -384,11 +456,13 @@ input_format_msgpack_number_of_columns 0 input_format_mysql_dump_map_column_names 1 input_format_mysql_dump_table_name input_format_native_allow_types_conversion 1 +input_format_native_decode_types_in_binary_format 0 input_format_null_as_default 1 input_format_orc_allow_missing_columns 1 input_format_orc_case_insensitive_column_matching 0 input_format_orc_filter_push_down 1 input_format_orc_import_nested 0 +input_format_orc_read_use_writer_time_zone 0 input_format_orc_row_batch_size 100000 input_format_orc_skip_columns_with_unsupported_types_in_schema_inference 0 input_format_orc_use_fast_decoder 1 @@ -398,17 +472,21 @@ input_format_parquet_case_insensitive_column_matching 0 input_format_parquet_filter_push_down 1 input_format_parquet_import_nested 0 input_format_parquet_local_file_min_bytes_for_seek 8192 -input_format_parquet_max_block_size 8192 +input_format_parquet_max_block_size 65409 +input_format_parquet_prefer_block_bytes 16744704 input_format_parquet_preserve_order 0 input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference 0 +input_format_parquet_use_native_reader 0 input_format_protobuf_flatten_google_wrappers 0 input_format_protobuf_skip_fields_with_unsupported_types_in_schema_inference 0 input_format_record_errors_file_path input_format_skip_unknown_fields 1 input_format_try_infer_dates 1 input_format_try_infer_datetimes 1 +input_format_try_infer_exponent_floats 0 input_format_try_infer_integers 1 input_format_tsv_allow_variable_number_of_columns 0 +input_format_tsv_crlf_end_of_line 0 input_format_tsv_detect_header 1 input_format_tsv_empty_as_default 0 input_format_tsv_enum_as_number 0 @@ -450,7 +528,12 @@ joined_subquery_requires_alias 1 kafka_disable_num_consumers_limit 0 kafka_max_wait_ms 5000 keeper_map_strict_mode 0 +keeper_max_retries 10 +keeper_retry_initial_backoff_ms 100 +keeper_retry_max_backoff_ms 5000 legacy_column_name_of_tuple_literal 0 +lightweight_deletes_sync 2 +lightweight_mutation_projection_mode throw limit 0 live_view_heartbeat_interval 15 load_balancing random @@ -461,7 +544,7 @@ local_filesystem_read_prefetch 0 lock_acquire_timeout 120 log_comment log_formatted_queries 0 -log_processors_profiles 0 +log_processors_profiles 1 log_profile_events 1 log_queries 1 log_queries_cut_to_length 100000 @@ -474,6 +557,8 @@ log_query_views 1 low_cardinality_allow_in_native_format 1 low_cardinality_max_dictionary_size 8192 low_cardinality_use_single_dictionary_for_part 0 +materialize_skip_indexes_on_insert 1 +materialize_statistics_on_insert 1 materialize_ttl_after_modify 1 materialized_views_ignore_errors 0 max_alter_threads \'auto(16)\' @@ -501,6 +586,7 @@ max_distributed_depth 5 max_download_buffer_size 10485760 max_download_threads 4 max_entries_for_hash_table_stats 10000 +max_estimated_execution_time 0 max_execution_speed 0 max_execution_speed_bytes 0 max_execution_time 0 @@ -528,7 +614,9 @@ max_network_bandwidth_for_user 0 max_network_bytes 0 max_number_of_partitions_for_independent_aggregation 128 max_parallel_replicas 1 +max_parser_backtracks 1000000 max_parser_depth 1000 +max_parsing_threads \'auto(16)\' max_partition_size_to_drop 50000000000 max_partitions_per_insert_block 100 max_partitions_to_read -1 @@ -537,6 +625,7 @@ max_query_size 262144 max_read_buffer_size 1048576 max_read_buffer_size_local_fs 131072 max_read_buffer_size_remote_fs 0 +max_recursive_cte_evaluation_depth 1000 max_remote_read_network_bandwidth 0 max_remote_read_network_bandwidth_for_server 0 max_remote_write_network_bandwidth 0 @@ -549,7 +638,7 @@ max_result_rows 0 max_rows_in_distinct 0 max_rows_in_join 0 max_rows_in_set 0 -max_rows_in_set_to_optimize_join 100000 +max_rows_in_set_to_optimize_join 0 max_rows_to_group_by 0 max_rows_to_read 0 max_rows_to_read_leaf 0 @@ -557,6 +646,7 @@ max_rows_to_sort 0 max_rows_to_transfer 0 max_sessions_for_user 0 max_size_to_preallocate_for_aggregation 100000000 +max_size_to_preallocate_for_joins 100000000 max_streams_for_merge_tree_reading 0 max_streams_multiplier_for_merge_tables 5 max_streams_to_max_threads_ratio 1 @@ -592,6 +682,7 @@ merge_tree_min_bytes_per_task_for_remote_reading 4194304 merge_tree_min_rows_for_concurrent_read 163840 merge_tree_min_rows_for_concurrent_read_for_remote_filesystem 163840 merge_tree_min_rows_for_seek 0 +merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability 0 merge_tree_use_const_size_tasks_for_remote_reading 1 metrics_perf_events_enabled 0 metrics_perf_events_list @@ -604,6 +695,8 @@ min_count_to_compile_expression 3 min_count_to_compile_sort_description 3 min_execution_speed 0 min_execution_speed_bytes 0 +min_external_table_block_size_bytes 268402944 +min_external_table_block_size_rows 1048449 min_free_disk_space_for_temporary_data 0 min_hit_rate_to_use_consecutive_keys_optimization 0.5 min_insert_block_size_bytes 268402944 @@ -619,8 +712,8 @@ mutations_execute_subqueries_on_initiator 0 mutations_max_literal_size_to_replace 16384 mutations_sync 0 mysql_datatypes_support_level -mysql_map_fixed_string_to_text_in_show_columns 0 -mysql_map_string_to_text_in_show_columns 0 +mysql_map_fixed_string_to_text_in_show_columns 1 +mysql_map_string_to_text_in_show_columns 1 mysql_max_rows_to_insert 65536 network_compression_method LZ4 network_zstd_compression_level 1 @@ -647,6 +740,7 @@ optimize_group_by_constant_keys 1 optimize_group_by_function_keys 1 optimize_if_chain_to_multiif 0 optimize_if_transform_strings_to_enum 0 +optimize_injective_functions_in_group_by 1 optimize_injective_functions_inside_uniq 1 optimize_min_equality_disjunction_chain_length 3 optimize_min_inequality_conjunction_chain_length 3 @@ -664,7 +758,7 @@ optimize_redundant_functions_in_order_by 1 optimize_respect_aliases 1 optimize_rewrite_aggregate_function_with_if 1 optimize_rewrite_array_exists_to_has 0 -optimize_rewrite_sum_if_to_count_if 0 +optimize_rewrite_sum_if_to_count_if 1 optimize_skip_merged_partitions 0 optimize_skip_unused_shards 0 optimize_skip_unused_shards_limit 1000 @@ -674,9 +768,10 @@ optimize_sorting_by_input_stream_properties 1 optimize_substitute_columns 0 optimize_syntax_fuse_functions 0 optimize_throw_if_noop 0 +optimize_time_filter_with_preimage 1 optimize_trivial_approximate_count_query 0 optimize_trivial_count_query 1 -optimize_trivial_insert_select 1 +optimize_trivial_insert_select 0 optimize_uniq_to_count 1 optimize_use_implicit_projections 1 optimize_use_projections 1 @@ -685,13 +780,19 @@ os_thread_priority 0 output_format_arrow_compression_method lz4_frame output_format_arrow_fixed_string_as_fixed_byte_array 1 output_format_arrow_low_cardinality_as_dictionary 0 -output_format_arrow_string_as_string 0 +output_format_arrow_string_as_string 1 +output_format_arrow_use_64_bit_indexes_for_dictionary 0 +output_format_arrow_use_signed_indexes_for_dictionary 1 output_format_avro_codec output_format_avro_rows_in_file 1 output_format_avro_string_column_pattern output_format_avro_sync_interval 16384 +output_format_binary_encode_types_in_binary_format 0 output_format_bson_string_as_string 0 +output_format_compression_level 3 +output_format_compression_zstd_window_log 0 output_format_csv_crlf_end_of_line 0 +output_format_csv_serialize_tuple_into_separate_columns 1 output_format_decimal_trailing_zeros 0 output_format_enable_streaming 0 output_format_json_array_of_rows 0 @@ -705,27 +806,34 @@ output_format_json_skip_null_value_in_named_tuples 0 output_format_json_validate_utf8 0 output_format_markdown_escape_special_characters 0 output_format_msgpack_uuid_representation ext -output_format_orc_compression_method lz4 +output_format_native_encode_types_in_binary_format 0 +output_format_orc_compression_method zstd output_format_orc_row_index_stride 10000 -output_format_orc_string_as_string 0 +output_format_orc_string_as_string 1 output_format_parallel_formatting 1 output_format_parquet_batch_size 1024 output_format_parquet_compliant_nested_types 1 -output_format_parquet_compression_method lz4 +output_format_parquet_compression_method zstd output_format_parquet_data_page_size 1048576 output_format_parquet_fixed_string_as_fixed_byte_array 1 output_format_parquet_parallel_encoding 1 output_format_parquet_row_group_size 1000000 output_format_parquet_row_group_size_bytes 536870912 -output_format_parquet_string_as_string 0 -output_format_parquet_use_custom_encoder 0 +output_format_parquet_string_as_string 1 +output_format_parquet_use_custom_encoder 1 output_format_parquet_version 2.latest -output_format_pretty_color 1 +output_format_parquet_write_page_index 1 +output_format_pretty_color auto +output_format_pretty_display_footer_column_names 1 +output_format_pretty_display_footer_column_names_min_rows 50 output_format_pretty_grid_charset UTF-8 +output_format_pretty_highlight_digit_groups 1 output_format_pretty_max_column_pad_width 250 output_format_pretty_max_rows 10000 output_format_pretty_max_value_width 10000 -output_format_pretty_row_numbers 0 +output_format_pretty_max_value_width_apply_for_single_value 0 +output_format_pretty_row_numbers 1 +output_format_pretty_single_large_number_tip_threshold 1000000 output_format_protobuf_nullables_with_google_wrappers 0 output_format_schema output_format_sql_insert_include_column_names 1 @@ -734,15 +842,22 @@ output_format_sql_insert_quote_names 1 output_format_sql_insert_table_name table output_format_sql_insert_use_replace 0 output_format_tsv_crlf_end_of_line 0 +output_format_values_escape_quote_with_quote 0 output_format_write_statistics 1 +page_cache_inject_eviction 0 parallel_distributed_insert_select 0 parallel_replica_offset 0 +parallel_replicas_allow_in_with_subquery 1 parallel_replicas_count 0 parallel_replicas_custom_key parallel_replicas_custom_key_filter_type default +parallel_replicas_custom_key_range_lower 0 +parallel_replicas_custom_key_range_upper 0 parallel_replicas_for_non_replicated_merge_tree 0 +parallel_replicas_mark_segment_size 128 parallel_replicas_min_number_of_granules_to_enable 0 parallel_replicas_min_number_of_rows_per_replica 0 +parallel_replicas_prefer_local_join 1 parallel_replicas_single_task_marks_count_multiplier 2 parallel_view_processing 0 parallelize_output_from_storages 1 @@ -755,11 +870,14 @@ parts_to_delay_insert 0 parts_to_throw_insert 0 periodic_live_view_refresh 60 poll_interval 10 +postgresql_connection_attempt_timeout 2 postgresql_connection_pool_auto_close_connection 0 +postgresql_connection_pool_retries 2 postgresql_connection_pool_size 16 postgresql_connection_pool_wait_timeout 5000 precise_float_parsing 0 prefer_column_name_to_alias 0 +prefer_external_sort_block_bytes 16744704 prefer_global_in_and_join 0 prefer_localhost_replica 1 prefer_warmed_unmerged_parts_seconds 0 @@ -767,7 +885,7 @@ preferred_block_size_bytes 1000000 preferred_max_column_in_block_size_bytes 0 preferred_optimize_projection_name prefetch_buffer_size 1048576 -print_pretty_type_names 0 +print_pretty_type_names 1 priority 0 query_cache_compress_entries 1 query_cache_max_entries 0 @@ -778,8 +896,10 @@ query_cache_nondeterministic_function_handling throw query_cache_share_between_users 0 query_cache_squash_partial_results 1 query_cache_store_results_of_queries_with_nondeterministic_functions 0 +query_cache_system_table_handling throw query_cache_ttl 60 query_plan_aggregation_in_order 1 +query_plan_convert_outer_join_to_inner_join 1 query_plan_enable_multithreading_after_window_functions 1 query_plan_enable_optimizations 1 query_plan_execute_functions_after_sorting 1 @@ -788,6 +908,8 @@ query_plan_lift_up_array_join 1 query_plan_lift_up_union 1 query_plan_max_optimizations_to_apply 10000 query_plan_merge_expressions 1 +query_plan_merge_filters 0 +query_plan_optimize_prewhere 1 query_plan_optimize_primary_key 1 query_plan_optimize_projection 1 query_plan_push_down_limit 1 @@ -806,7 +928,9 @@ read_backoff_min_events 2 read_backoff_min_interval_between_events_ms 1000 read_backoff_min_latency_ms 1000 read_from_filesystem_cache_if_exists_otherwise_bypass_cache 0 +read_from_page_cache_if_exists_otherwise_bypass_cache 0 read_in_order_two_level_merge_threshold 100 +read_in_order_use_buffering 1 read_overflow_mode throw read_overflow_mode_leaf throw read_priority 0 @@ -835,17 +959,20 @@ result_overflow_mode throw rewrite_count_distinct_if_with_count_distinct_implementation 0 s3_allow_parallel_part_upload 1 s3_check_objects_after_upload 0 +s3_connect_timeout_ms 1000 s3_create_new_file_on_insert 0 s3_disable_checksum 0 -s3_http_connection_pool_size 1000 +s3_ignore_file_doesnt_exist 0 s3_list_object_keys_size 1000 s3_max_connections 1024 s3_max_get_burst 0 s3_max_get_rps 0 s3_max_inflight_parts_for_one_file 20 +s3_max_part_number 10000 s3_max_put_burst 0 s3_max_put_rps 0 s3_max_redirects 10 +s3_max_single_operation_copy_size 33554432 s3_max_single_part_upload_size 33554432 s3_max_single_read_retries 4 s3_max_unexpected_write_error_retries 4 @@ -860,6 +987,8 @@ s3_truncate_on_insert 0 s3_upload_part_size_multiply_factor 2 s3_upload_part_size_multiply_parts_count_threshold 500 s3_use_adaptive_timeouts 1 +s3_validate_request_settings 1 +s3queue_allow_experimental_sharded_mode 0 s3queue_default_zookeeper_path /clickhouse/s3queue/ s3queue_enable_logging_to_s3queue_log 0 schema_inference_cache_require_modification_time_for_url 1 @@ -887,6 +1016,8 @@ sleep_after_receiving_query_ms 0 sleep_in_send_data_ms 0 sleep_in_send_tables_status_ms 0 sort_overflow_mode throw +split_intersecting_parts_ranges_into_layers_final 1 +split_parts_ranges_into_intersecting_and_non_intersecting_final 1 splitby_max_substrings_includes_remaining_string 0 stop_refreshable_materialized_views_on_startup 0 storage_file_read_method pread @@ -898,8 +1029,10 @@ stream_poll_timeout_ms 500 system_events_show_zero_values 0 table_function_remote_max_addresses 1000 tcp_keep_alive_timeout 290 +temporary_data_in_cache_reserve_space_wait_lock_timeout_milliseconds 600000 temporary_files_codec LZ4 temporary_live_view_timeout 1 +throw_if_deduplication_in_dependent_materialized_views_enabled_with_async_insert 1 throw_if_no_data_to_insert 1 throw_on_error_from_cache_on_write_operations 0 throw_on_max_partitions_per_insert_block 1 @@ -912,8 +1045,10 @@ totals_mode after_having_exclusive trace_profile_events 0 transfer_overflow_mode throw transform_null_in 0 +traverse_shadow_remote_data_paths 0 union_default_mode unknown_packet_in_send_data 0 +update_insert_deduplication_token_in_dependent_materialized_views 0 use_cache_for_count_from_files 1 use_client_time_zone 0 use_compact_format_in_distributed_parts_names 1 @@ -923,12 +1058,15 @@ use_index_for_in_with_subqueries 1 use_index_for_in_with_subqueries_max_values 0 use_local_cache_for_remote_storage 1 use_mysql_types_in_show_columns 0 +use_page_cache_for_disks_without_file_cache 0 use_query_cache 0 use_skip_indexes 1 use_skip_indexes_if_final 0 use_structure_from_insertion_table_in_table_functions 2 use_uncompressed_cache 0 +use_variant_as_common_type 0 use_with_fill_by_sorting_prefix 1 +validate_experimental_and_suspicious_types_inside_nested_types 1 validate_polygons 1 wait_changes_become_visible_after_commit_mode wait_unknown wait_for_async_insert 1 diff --git a/tests/queries/0_stateless/02995_new_settings_history.sh b/tests/queries/0_stateless/02995_new_settings_history.sh index 8de98c55b6a..917dacc04b0 100755 --- a/tests/queries/0_stateless/02995_new_settings_history.sh +++ b/tests/queries/0_stateless/02995_new_settings_history.sh @@ -7,12 +7,12 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CUR_DIR"/../shell_config.sh # Note that this is a broad check. A per version check is done in the upgrade test -# Baseline generated with 23.12.1 -# clickhouse local --query "select name, default from system.settings order by name format TSV" > 02995_baseline_23_12_1.tsv +# Baseline generated with 24.7.1 +# clickhouse local --query "select name, default from system.settings order by name format TSV" > 02995_baseline_24_7_1.tsv $CLICKHOUSE_LOCAL --query " WITH old_settings AS ( - SELECT * FROM file('${CUR_DIR}/02995_baseline_23_12_1.tsv', 'TSV', 'name String, default String') + SELECT * FROM file('${CUR_DIR}/02995_baseline_24_7_1.tsv', 'TSV', 'name String, default String') ), new_settings AS ( @@ -21,7 +21,7 @@ $CLICKHOUSE_LOCAL --query " ) SELECT * FROM ( - SELECT 'PLEASE ADD THE NEW SETTING TO SettingsChangesHistory.h: ' || name || ' WAS ADDED', + SELECT 'PLEASE ADD THE NEW SETTING TO SettingsChangesHistory.cpp: ' || name || ' WAS ADDED', FROM new_settings WHERE (name NOT IN ( SELECT name @@ -29,17 +29,17 @@ $CLICKHOUSE_LOCAL --query " )) AND (name NOT IN ( SELECT arrayJoin(tupleElement(changes, 'name')) FROM system.settings_changes - WHERE splitByChar('.', version())[1] >= '24' + WHERE splitByChar('.', version)[1]::UInt64 >= 24 AND splitByChar('.', version)[2]::UInt64 > 7 )) UNION ALL ( - SELECT 'PLEASE ADD THE SETTING VALUE CHANGE TO SettingsChangesHistory.h: ' || name || ' WAS CHANGED FROM ' || old_settings.default || ' TO ' || new_settings.default, + SELECT 'PLEASE ADD THE SETTING VALUE CHANGE TO SettingsChangesHistory.cpp: ' || name || ' WAS CHANGED FROM ' || old_settings.default || ' TO ' || new_settings.default, FROM new_settings LEFT JOIN old_settings ON new_settings.name = old_settings.name WHERE (new_settings.default != old_settings.default) AND (name NOT IN ( SELECT arrayJoin(tupleElement(changes, 'name')) FROM system.settings_changes - WHERE splitByChar('.', version())[1] >= '24' + WHERE splitByChar('.', version)[1]::UInt64 >= 24 AND splitByChar('.', version)[2]::UInt64 > 7 )) ) ) From b178eea09ec80fed40b5043ccf1635d95b9cf19b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Wed, 31 Jul 2024 14:59:17 +0200 Subject: [PATCH 498/661] Fix broken settings --- src/Core/SettingsChangesHistory.cpp | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp index 9faf77e9087..ecc558e64d7 100644 --- a/src/Core/SettingsChangesHistory.cpp +++ b/src/Core/SettingsChangesHistory.cpp @@ -57,6 +57,16 @@ String ClickHouseVersion::toString() const /// Note: please check if the key already exists to prevent duplicate entries. static std::initializer_list> settings_changes_history_initializer = { + {"24.12",{ + }}, + {"24.11",{ + }}, + {"24.10",{ + }}, + {"24.9", { + }}, + {"24.8", {{"merge_tree_min_bytes_per_task_for_remote_reading", 4194304, 2097152, "Value is unified with `filesystem_prefetch_min_bytes_for_single_read_task`"}, + }}, {"24.7", {{"output_format_parquet_write_page_index", false, true, "Add a possibility to write page index into parquet files."}, {"output_format_binary_encode_types_in_binary_format", false, false, "Added new setting to allow to write type names in binary format in RowBinaryWithNamesAndTypes output format"}, {"input_format_binary_decode_types_in_binary_format", false, false, "Added new setting to allow to read type names in binary format in RowBinaryWithNamesAndTypes input format"}, @@ -76,7 +86,6 @@ static std::initializer_list Date: Wed, 31 Jul 2024 15:11:55 +0200 Subject: [PATCH 499/661] Try a less conflict prone format --- src/Core/SettingsChangesHistory.cpp | 709 +++++++++++++++++----------- 1 file changed, 441 insertions(+), 268 deletions(-) diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp index ecc558e64d7..21c89b3c5c5 100644 --- a/src/Core/SettingsChangesHistory.cpp +++ b/src/Core/SettingsChangesHistory.cpp @@ -57,274 +57,447 @@ String ClickHouseVersion::toString() const /// Note: please check if the key already exists to prevent duplicate entries. static std::initializer_list> settings_changes_history_initializer = { - {"24.12",{ - }}, - {"24.11",{ - }}, - {"24.10",{ - }}, - {"24.9", { - }}, - {"24.8", {{"merge_tree_min_bytes_per_task_for_remote_reading", 4194304, 2097152, "Value is unified with `filesystem_prefetch_min_bytes_for_single_read_task`"}, - }}, - {"24.7", {{"output_format_parquet_write_page_index", false, true, "Add a possibility to write page index into parquet files."}, - {"output_format_binary_encode_types_in_binary_format", false, false, "Added new setting to allow to write type names in binary format in RowBinaryWithNamesAndTypes output format"}, - {"input_format_binary_decode_types_in_binary_format", false, false, "Added new setting to allow to read type names in binary format in RowBinaryWithNamesAndTypes input format"}, - {"output_format_native_encode_types_in_binary_format", false, false, "Added new setting to allow to write type names in binary format in Native output format"}, - {"input_format_native_decode_types_in_binary_format", false, false, "Added new setting to allow to read type names in binary format in Native output format"}, - {"read_in_order_use_buffering", false, true, "Use buffering before merging while reading in order of primary key"}, - {"enable_named_columns_in_function_tuple", false, true, "Generate named tuples in function tuple() when all names are unique and can be treated as unquoted identifiers."}, - {"input_format_json_case_insensitive_column_matching", false, false, "Ignore case when matching JSON keys with CH columns."}, - {"optimize_trivial_insert_select", true, false, "The optimization does not make sense in many cases."}, - {"dictionary_validate_primary_key_type", false, false, "Validate primary key type for dictionaries. By default id type for simple layouts will be implicitly converted to UInt64."}, - {"collect_hash_table_stats_during_joins", false, true, "New setting."}, - {"max_size_to_preallocate_for_joins", 0, 100'000'000, "New setting."}, - {"input_format_orc_read_use_writer_time_zone", false, false, "Whether use the writer's time zone in ORC stripe for ORC row reader, the default ORC row reader's time zone is GMT."}, - {"lightweight_mutation_projection_mode", "throw", "throw", "When lightweight delete happens on a table with projection(s), the possible operations include throw the exception as projection exists, or drop all projection related to this table then do lightweight delete."}, - {"database_replicated_allow_heavy_create", true, false, "Long-running DDL queries (CREATE AS SELECT and POPULATE) for Replicated database engine was forbidden"}, - {"query_plan_merge_filters", false, false, "Allow to merge filters in the query plan"}, - {"azure_sdk_max_retries", 10, 10, "Maximum number of retries in azure sdk"}, - {"azure_sdk_retry_initial_backoff_ms", 10, 10, "Minimal backoff between retries in azure sdk"}, - {"azure_sdk_retry_max_backoff_ms", 1000, 1000, "Maximal backoff between retries in azure sdk"}, - {"ignore_on_cluster_for_replicated_named_collections_queries", false, false, "Ignore ON CLUSTER clause for replicated named collections management queries."}, - {"backup_restore_s3_retry_attempts", 1000,1000, "Setting for Aws::Client::RetryStrategy, Aws::Client does retries itself, 0 means no retries. It takes place only for backup/restore."}, - {"postgresql_connection_attempt_timeout", 2, 2, "Allow to control 'connect_timeout' parameter of PostgreSQL connection."}, - {"postgresql_connection_pool_retries", 2, 2, "Allow to control the number of retries in PostgreSQL connection pool."} - }}, - {"24.6", {{"materialize_skip_indexes_on_insert", true, true, "Added new setting to allow to disable materialization of skip indexes on insert"}, - {"materialize_statistics_on_insert", true, true, "Added new setting to allow to disable materialization of statistics on insert"}, - {"input_format_parquet_use_native_reader", false, false, "When reading Parquet files, to use native reader instead of arrow reader."}, - {"hdfs_throw_on_zero_files_match", false, false, "Allow to throw an error when ListObjects request cannot match any files in HDFS engine instead of empty query result"}, - {"azure_throw_on_zero_files_match", false, false, "Allow to throw an error when ListObjects request cannot match any files in AzureBlobStorage engine instead of empty query result"}, - {"s3_validate_request_settings", true, true, "Allow to disable S3 request settings validation"}, - {"allow_experimental_full_text_index", false, false, "Enable experimental full-text index"}, - {"azure_skip_empty_files", false, false, "Allow to skip empty files in azure table engine"}, - {"hdfs_ignore_file_doesnt_exist", false, false, "Allow to return 0 rows when the requested files don't exist instead of throwing an exception in HDFS table engine"}, - {"azure_ignore_file_doesnt_exist", false, false, "Allow to return 0 rows when the requested files don't exist instead of throwing an exception in AzureBlobStorage table engine"}, - {"s3_ignore_file_doesnt_exist", false, false, "Allow to return 0 rows when the requested files don't exist instead of throwing an exception in S3 table engine"}, - {"s3_max_part_number", 10000, 10000, "Maximum part number number for s3 upload part"}, - {"s3_max_single_operation_copy_size", 32 * 1024 * 1024, 32 * 1024 * 1024, "Maximum size for a single copy operation in s3"}, - {"input_format_parquet_max_block_size", 8192, DEFAULT_BLOCK_SIZE, "Increase block size for parquet reader."}, - {"input_format_parquet_prefer_block_bytes", 0, DEFAULT_BLOCK_SIZE * 256, "Average block bytes output by parquet reader."}, - {"enable_blob_storage_log", true, true, "Write information about blob storage operations to system.blob_storage_log table"}, - {"allow_deprecated_snowflake_conversion_functions", true, false, "Disabled deprecated functions snowflakeToDateTime[64] and dateTime[64]ToSnowflake."}, - {"allow_statistic_optimize", false, false, "Old setting which popped up here being renamed."}, - {"allow_experimental_statistic", false, false, "Old setting which popped up here being renamed."}, - {"allow_statistics_optimize", false, false, "The setting was renamed. The previous name is `allow_statistic_optimize`."}, - {"allow_experimental_statistics", false, false, "The setting was renamed. The previous name is `allow_experimental_statistic`."}, - {"enable_vertical_final", false, true, "Enable vertical final by default again after fixing bug"}, - {"parallel_replicas_custom_key_range_lower", 0, 0, "Add settings to control the range filter when using parallel replicas with dynamic shards"}, - {"parallel_replicas_custom_key_range_upper", 0, 0, "Add settings to control the range filter when using parallel replicas with dynamic shards. A value of 0 disables the upper limit"}, - {"output_format_pretty_display_footer_column_names", 0, 1, "Add a setting to display column names in the footer if there are many rows. Threshold value is controlled by output_format_pretty_display_footer_column_names_min_rows."}, - {"output_format_pretty_display_footer_column_names_min_rows", 0, 50, "Add a setting to control the threshold value for setting output_format_pretty_display_footer_column_names_min_rows. Default 50."}, - {"output_format_csv_serialize_tuple_into_separate_columns", true, true, "A new way of how interpret tuples in CSV format was added."}, - {"input_format_csv_deserialize_separate_columns_into_tuple", true, true, "A new way of how interpret tuples in CSV format was added."}, - {"input_format_csv_try_infer_strings_from_quoted_tuples", true, true, "A new way of how interpret tuples in CSV format was added."}, - }}, - {"24.5", {{"allow_deprecated_error_prone_window_functions", true, false, "Allow usage of deprecated error prone window functions (neighbor, runningAccumulate, runningDifferenceStartingWithFirstValue, runningDifference)"}, - {"allow_experimental_join_condition", false, false, "Support join with inequal conditions which involve columns from both left and right table. e.g. t1.y < t2.y."}, - {"input_format_tsv_crlf_end_of_line", false, false, "Enables reading of CRLF line endings with TSV formats"}, - {"output_format_parquet_use_custom_encoder", false, true, "Enable custom Parquet encoder."}, - {"cross_join_min_rows_to_compress", 0, 10000000, "Minimal count of rows to compress block in CROSS JOIN. Zero value means - disable this threshold. This block is compressed when any of the two thresholds (by rows or by bytes) are reached."}, - {"cross_join_min_bytes_to_compress", 0, 1_GiB, "Minimal size of block to compress in CROSS JOIN. Zero value means - disable this threshold. This block is compressed when any of the two thresholds (by rows or by bytes) are reached."}, - {"http_max_chunk_size", 0, 0, "Internal limitation"}, - {"prefer_external_sort_block_bytes", 0, DEFAULT_BLOCK_SIZE * 256, "Prefer maximum block bytes for external sort, reduce the memory usage during merging."}, - {"input_format_force_null_for_omitted_fields", false, false, "Disable type-defaults for omitted fields when needed"}, - {"cast_string_to_dynamic_use_inference", false, false, "Add setting to allow converting String to Dynamic through parsing"}, - {"allow_experimental_dynamic_type", false, false, "Add new experimental Dynamic type"}, - {"azure_max_blocks_in_multipart_upload", 50000, 50000, "Maximum number of blocks in multipart upload for Azure."}, - }}, - {"24.4", {{"input_format_json_throw_on_bad_escape_sequence", true, true, "Allow to save JSON strings with bad escape sequences"}, - {"max_parsing_threads", 0, 0, "Add a separate setting to control number of threads in parallel parsing from files"}, - {"ignore_drop_queries_probability", 0, 0, "Allow to ignore drop queries in server with specified probability for testing purposes"}, - {"lightweight_deletes_sync", 2, 2, "The same as 'mutation_sync', but controls only execution of lightweight deletes"}, - {"query_cache_system_table_handling", "save", "throw", "The query cache no longer caches results of queries against system tables"}, - {"input_format_json_ignore_unnecessary_fields", false, true, "Ignore unnecessary fields and not parse them. Enabling this may not throw exceptions on json strings of invalid format or with duplicated fields"}, - {"input_format_hive_text_allow_variable_number_of_columns", false, true, "Ignore extra columns in Hive Text input (if file has more columns than expected) and treat missing fields in Hive Text input as default values."}, - {"allow_experimental_database_replicated", false, true, "Database engine Replicated is now in Beta stage"}, - {"temporary_data_in_cache_reserve_space_wait_lock_timeout_milliseconds", (10 * 60 * 1000), (10 * 60 * 1000), "Wait time to lock cache for sapce reservation in temporary data in filesystem cache"}, - {"optimize_rewrite_sum_if_to_count_if", false, true, "Only available for the analyzer, where it works correctly"}, - {"azure_allow_parallel_part_upload", "true", "true", "Use multiple threads for azure multipart upload."}, - {"max_recursive_cte_evaluation_depth", DBMS_RECURSIVE_CTE_MAX_EVALUATION_DEPTH, DBMS_RECURSIVE_CTE_MAX_EVALUATION_DEPTH, "Maximum limit on recursive CTE evaluation depth"}, - {"query_plan_convert_outer_join_to_inner_join", false, true, "Allow to convert OUTER JOIN to INNER JOIN if filter after JOIN always filters default values"}, - }}, - {"24.3", {{"s3_connect_timeout_ms", 1000, 1000, "Introduce new dedicated setting for s3 connection timeout"}, - {"allow_experimental_shared_merge_tree", false, true, "The setting is obsolete"}, - {"use_page_cache_for_disks_without_file_cache", false, false, "Added userspace page cache"}, - {"read_from_page_cache_if_exists_otherwise_bypass_cache", false, false, "Added userspace page cache"}, - {"page_cache_inject_eviction", false, false, "Added userspace page cache"}, - {"default_table_engine", "None", "MergeTree", "Set default table engine to MergeTree for better usability"}, - {"input_format_json_use_string_type_for_ambiguous_paths_in_named_tuples_inference_from_objects", false, false, "Allow to use String type for ambiguous paths during named tuple inference from JSON objects"}, - {"traverse_shadow_remote_data_paths", false, false, "Traverse shadow directory when query system.remote_data_paths."}, - {"throw_if_deduplication_in_dependent_materialized_views_enabled_with_async_insert", false, true, "Deduplication in dependent materialized view cannot work together with async inserts."}, - {"parallel_replicas_allow_in_with_subquery", false, true, "If true, subquery for IN will be executed on every follower replica"}, - {"log_processors_profiles", false, true, "Enable by default"}, - {"function_locate_has_mysql_compatible_argument_order", false, true, "Increase compatibility with MySQL's locate function."}, - {"allow_suspicious_primary_key", true, false, "Forbid suspicious PRIMARY KEY/ORDER BY for MergeTree (i.e. SimpleAggregateFunction)"}, - {"filesystem_cache_reserve_space_wait_lock_timeout_milliseconds", 1000, 1000, "Wait time to lock cache for sapce reservation in filesystem cache"}, - {"max_parser_backtracks", 0, 1000000, "Limiting the complexity of parsing"}, - {"analyzer_compatibility_join_using_top_level_identifier", false, false, "Force to resolve identifier in JOIN USING from projection"}, - {"distributed_insert_skip_read_only_replicas", false, false, "If true, INSERT into Distributed will skip read-only replicas"}, - {"keeper_max_retries", 10, 10, "Max retries for general keeper operations"}, - {"keeper_retry_initial_backoff_ms", 100, 100, "Initial backoff timeout for general keeper operations"}, - {"keeper_retry_max_backoff_ms", 5000, 5000, "Max backoff timeout for general keeper operations"}, - {"s3queue_allow_experimental_sharded_mode", false, false, "Enable experimental sharded mode of S3Queue table engine. It is experimental because it will be rewritten"}, - {"allow_experimental_analyzer", false, true, "Enable analyzer and planner by default."}, - {"merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability", 0.0, 0.0, "For testing of `PartsSplitter` - split read ranges into intersecting and non intersecting every time you read from MergeTree with the specified probability."}, - {"allow_get_client_http_header", false, false, "Introduced a new function."}, - {"output_format_pretty_row_numbers", false, true, "It is better for usability."}, - {"output_format_pretty_max_value_width_apply_for_single_value", true, false, "Single values in Pretty formats won't be cut."}, - {"output_format_parquet_string_as_string", false, true, "ClickHouse allows arbitrary binary data in the String data type, which is typically UTF-8. Parquet/ORC/Arrow Strings only support UTF-8. That's why you can choose which Arrow's data type to use for the ClickHouse String data type - String or Binary. While Binary would be more correct and compatible, using String by default will correspond to user expectations in most cases."}, - {"output_format_orc_string_as_string", false, true, "ClickHouse allows arbitrary binary data in the String data type, which is typically UTF-8. Parquet/ORC/Arrow Strings only support UTF-8. That's why you can choose which Arrow's data type to use for the ClickHouse String data type - String or Binary. While Binary would be more correct and compatible, using String by default will correspond to user expectations in most cases."}, - {"output_format_arrow_string_as_string", false, true, "ClickHouse allows arbitrary binary data in the String data type, which is typically UTF-8. Parquet/ORC/Arrow Strings only support UTF-8. That's why you can choose which Arrow's data type to use for the ClickHouse String data type - String or Binary. While Binary would be more correct and compatible, using String by default will correspond to user expectations in most cases."}, - {"output_format_parquet_compression_method", "lz4", "zstd", "Parquet/ORC/Arrow support many compression methods, including lz4 and zstd. ClickHouse supports each and every compression method. Some inferior tools, such as 'duckdb', lack support for the faster `lz4` compression method, that's why we set zstd by default."}, - {"output_format_orc_compression_method", "lz4", "zstd", "Parquet/ORC/Arrow support many compression methods, including lz4 and zstd. ClickHouse supports each and every compression method. Some inferior tools, such as 'duckdb', lack support for the faster `lz4` compression method, that's why we set zstd by default."}, - {"output_format_pretty_highlight_digit_groups", false, true, "If enabled and if output is a terminal, highlight every digit corresponding to the number of thousands, millions, etc. with underline."}, - {"geo_distance_returns_float64_on_float64_arguments", false, true, "Increase the default precision."}, - {"azure_max_inflight_parts_for_one_file", 20, 20, "The maximum number of a concurrent loaded parts in multipart upload request. 0 means unlimited."}, - {"azure_strict_upload_part_size", 0, 0, "The exact size of part to upload during multipart upload to Azure blob storage."}, - {"azure_min_upload_part_size", 16*1024*1024, 16*1024*1024, "The minimum size of part to upload during multipart upload to Azure blob storage."}, - {"azure_max_upload_part_size", 5ull*1024*1024*1024, 5ull*1024*1024*1024, "The maximum size of part to upload during multipart upload to Azure blob storage."}, - {"azure_upload_part_size_multiply_factor", 2, 2, "Multiply azure_min_upload_part_size by this factor each time azure_multiply_parts_count_threshold parts were uploaded from a single write to Azure blob storage."}, - {"azure_upload_part_size_multiply_parts_count_threshold", 500, 500, "Each time this number of parts was uploaded to Azure blob storage, azure_min_upload_part_size is multiplied by azure_upload_part_size_multiply_factor."}, - {"output_format_csv_serialize_tuple_into_separate_columns", true, true, "A new way of how interpret tuples in CSV format was added."}, - {"input_format_csv_deserialize_separate_columns_into_tuple", true, true, "A new way of how interpret tuples in CSV format was added."}, - {"input_format_csv_try_infer_strings_from_quoted_tuples", true, true, "A new way of how interpret tuples in CSV format was added."}, - }}, - {"24.2", {{"allow_suspicious_variant_types", true, false, "Don't allow creating Variant type with suspicious variants by default"}, - {"validate_experimental_and_suspicious_types_inside_nested_types", false, true, "Validate usage of experimental and suspicious types inside nested types"}, - {"output_format_values_escape_quote_with_quote", false, false, "If true escape ' with '', otherwise quoted with \\'"}, - {"output_format_pretty_single_large_number_tip_threshold", 0, 1'000'000, "Print a readable number tip on the right side of the table if the block consists of a single number which exceeds this value (except 0)"}, - {"input_format_try_infer_exponent_floats", true, false, "Don't infer floats in exponential notation by default"}, - {"query_plan_optimize_prewhere", true, true, "Allow to push down filter to PREWHERE expression for supported storages"}, - {"async_insert_max_data_size", 1000000, 10485760, "The previous value appeared to be too small."}, - {"async_insert_poll_timeout_ms", 10, 10, "Timeout in milliseconds for polling data from asynchronous insert queue"}, - {"async_insert_use_adaptive_busy_timeout", false, true, "Use adaptive asynchronous insert timeout"}, - {"async_insert_busy_timeout_min_ms", 50, 50, "The minimum value of the asynchronous insert timeout in milliseconds; it also serves as the initial value, which may be increased later by the adaptive algorithm"}, - {"async_insert_busy_timeout_max_ms", 200, 200, "The minimum value of the asynchronous insert timeout in milliseconds; async_insert_busy_timeout_ms is aliased to async_insert_busy_timeout_max_ms"}, - {"async_insert_busy_timeout_increase_rate", 0.2, 0.2, "The exponential growth rate at which the adaptive asynchronous insert timeout increases"}, - {"async_insert_busy_timeout_decrease_rate", 0.2, 0.2, "The exponential growth rate at which the adaptive asynchronous insert timeout decreases"}, - {"format_template_row_format", "", "", "Template row format string can be set directly in query"}, - {"format_template_resultset_format", "", "", "Template result set format string can be set in query"}, - {"split_parts_ranges_into_intersecting_and_non_intersecting_final", true, true, "Allow to split parts ranges into intersecting and non intersecting during FINAL optimization"}, - {"split_intersecting_parts_ranges_into_layers_final", true, true, "Allow to split intersecting parts ranges into layers during FINAL optimization"}, - {"azure_max_single_part_copy_size", 256*1024*1024, 256*1024*1024, "The maximum size of object to copy using single part copy to Azure blob storage."}, - {"min_external_table_block_size_rows", DEFAULT_INSERT_BLOCK_SIZE, DEFAULT_INSERT_BLOCK_SIZE, "Squash blocks passed to external table to specified size in rows, if blocks are not big enough"}, - {"min_external_table_block_size_bytes", DEFAULT_INSERT_BLOCK_SIZE * 256, DEFAULT_INSERT_BLOCK_SIZE * 256, "Squash blocks passed to external table to specified size in bytes, if blocks are not big enough."}, - {"parallel_replicas_prefer_local_join", true, true, "If true, and JOIN can be executed with parallel replicas algorithm, and all storages of right JOIN part are *MergeTree, local JOIN will be used instead of GLOBAL JOIN."}, - {"optimize_time_filter_with_preimage", true, true, "Optimize Date and DateTime predicates by converting functions into equivalent comparisons without conversions (e.g. toYear(col) = 2023 -> col >= '2023-01-01' AND col <= '2023-12-31')"}, - {"extract_key_value_pairs_max_pairs_per_row", 0, 0, "Max number of pairs that can be produced by the `extractKeyValuePairs` function. Used as a safeguard against consuming too much memory."}, - {"default_view_definer", "CURRENT_USER", "CURRENT_USER", "Allows to set default `DEFINER` option while creating a view"}, - {"default_materialized_view_sql_security", "DEFINER", "DEFINER", "Allows to set a default value for SQL SECURITY option when creating a materialized view"}, - {"default_normal_view_sql_security", "INVOKER", "INVOKER", "Allows to set default `SQL SECURITY` option while creating a normal view"}, - {"mysql_map_string_to_text_in_show_columns", false, true, "Reduce the configuration effort to connect ClickHouse with BI tools."}, - {"mysql_map_fixed_string_to_text_in_show_columns", false, true, "Reduce the configuration effort to connect ClickHouse with BI tools."}, - }}, - {"24.1", {{"print_pretty_type_names", false, true, "Better user experience."}, - {"input_format_json_read_bools_as_strings", false, true, "Allow to read bools as strings in JSON formats by default"}, - {"output_format_arrow_use_signed_indexes_for_dictionary", false, true, "Use signed indexes type for Arrow dictionaries by default as it's recommended"}, - {"allow_experimental_variant_type", false, false, "Add new experimental Variant type"}, - {"use_variant_as_common_type", false, false, "Allow to use Variant in if/multiIf if there is no common type"}, - {"output_format_arrow_use_64_bit_indexes_for_dictionary", false, false, "Allow to use 64 bit indexes type in Arrow dictionaries"}, - {"parallel_replicas_mark_segment_size", 128, 128, "Add new setting to control segment size in new parallel replicas coordinator implementation"}, - {"ignore_materialized_views_with_dropped_target_table", false, false, "Add new setting to allow to ignore materialized views with dropped target table"}, - {"output_format_compression_level", 3, 3, "Allow to change compression level in the query output"}, - {"output_format_compression_zstd_window_log", 0, 0, "Allow to change zstd window log in the query output when zstd compression is used"}, - {"enable_zstd_qat_codec", false, false, "Add new ZSTD_QAT codec"}, - {"enable_vertical_final", false, true, "Use vertical final by default"}, - {"output_format_arrow_use_64_bit_indexes_for_dictionary", false, false, "Allow to use 64 bit indexes type in Arrow dictionaries"}, - {"max_rows_in_set_to_optimize_join", 100000, 0, "Disable join optimization as it prevents from read in order optimization"}, - {"output_format_pretty_color", true, "auto", "Setting is changed to allow also for auto value, disabling ANSI escapes if output is not a tty"}, - {"function_visible_width_behavior", 0, 1, "We changed the default behavior of `visibleWidth` to be more precise"}, - {"max_estimated_execution_time", 0, 0, "Separate max_execution_time and max_estimated_execution_time"}, - {"iceberg_engine_ignore_schema_evolution", false, false, "Allow to ignore schema evolution in Iceberg table engine"}, - {"optimize_injective_functions_in_group_by", false, true, "Replace injective functions by it's arguments in GROUP BY section in analyzer"}, - {"update_insert_deduplication_token_in_dependent_materialized_views", false, false, "Allow to update insert deduplication token with table identifier during insert in dependent materialized views"}, - {"azure_max_unexpected_write_error_retries", 4, 4, "The maximum number of retries in case of unexpected errors during Azure blob storage write"}, - {"split_parts_ranges_into_intersecting_and_non_intersecting_final", false, true, "Allow to split parts ranges into intersecting and non intersecting during FINAL optimization"}, - {"split_intersecting_parts_ranges_into_layers_final", true, true, "Allow to split intersecting parts ranges into layers during FINAL optimization"}}}, - {"23.12", {{"allow_suspicious_ttl_expressions", true, false, "It is a new setting, and in previous versions the behavior was equivalent to allowing."}, - {"input_format_parquet_allow_missing_columns", false, true, "Allow missing columns in Parquet files by default"}, - {"input_format_orc_allow_missing_columns", false, true, "Allow missing columns in ORC files by default"}, - {"input_format_arrow_allow_missing_columns", false, true, "Allow missing columns in Arrow files by default"}}}, - {"23.11", {{"parsedatetime_parse_without_leading_zeros", false, true, "Improved compatibility with MySQL DATE_FORMAT/STR_TO_DATE"}}}, - {"23.9", {{"optimize_group_by_constant_keys", false, true, "Optimize group by constant keys by default"}, - {"input_format_json_try_infer_named_tuples_from_objects", false, true, "Try to infer named Tuples from JSON objects by default"}, - {"input_format_json_read_numbers_as_strings", false, true, "Allow to read numbers as strings in JSON formats by default"}, - {"input_format_json_read_arrays_as_strings", false, true, "Allow to read arrays as strings in JSON formats by default"}, - {"input_format_json_infer_incomplete_types_as_strings", false, true, "Allow to infer incomplete types as Strings in JSON formats by default"}, - {"input_format_json_try_infer_numbers_from_strings", true, false, "Don't infer numbers from strings in JSON formats by default to prevent possible parsing errors"}, - {"http_write_exception_in_output_format", false, true, "Output valid JSON/XML on exception in HTTP streaming."}}}, - {"23.8", {{"rewrite_count_distinct_if_with_count_distinct_implementation", false, true, "Rewrite countDistinctIf with count_distinct_implementation configuration"}}}, - {"23.7", {{"function_sleep_max_microseconds_per_block", 0, 3000000, "In previous versions, the maximum sleep time of 3 seconds was applied only for `sleep`, but not for `sleepEachRow` function. In the new version, we introduce this setting. If you set compatibility with the previous versions, we will disable the limit altogether."}}}, - {"23.6", {{"http_send_timeout", 180, 30, "3 minutes seems crazy long. Note that this is timeout for a single network write call, not for the whole upload operation."}, - {"http_receive_timeout", 180, 30, "See http_send_timeout."}}}, - {"23.5", {{"input_format_parquet_preserve_order", true, false, "Allow Parquet reader to reorder rows for better parallelism."}, - {"parallelize_output_from_storages", false, true, "Allow parallelism when executing queries that read from file/url/s3/etc. This may reorder rows."}, - {"use_with_fill_by_sorting_prefix", false, true, "Columns preceding WITH FILL columns in ORDER BY clause form sorting prefix. Rows with different values in sorting prefix are filled independently"}, - {"output_format_parquet_compliant_nested_types", false, true, "Change an internal field name in output Parquet file schema."}}}, - {"23.4", {{"allow_suspicious_indices", true, false, "If true, index can defined with identical expressions"}, - {"allow_nonconst_timezone_arguments", true, false, "Allow non-const timezone arguments in certain time-related functions like toTimeZone(), fromUnixTimestamp*(), snowflakeToDateTime*()."}, - {"connect_timeout_with_failover_ms", 50, 1000, "Increase default connect timeout because of async connect"}, - {"connect_timeout_with_failover_secure_ms", 100, 1000, "Increase default secure connect timeout because of async connect"}, - {"hedged_connection_timeout_ms", 100, 50, "Start new connection in hedged requests after 50 ms instead of 100 to correspond with previous connect timeout"}, - {"formatdatetime_f_prints_single_zero", true, false, "Improved compatibility with MySQL DATE_FORMAT()/STR_TO_DATE()"}, - {"formatdatetime_parsedatetime_m_is_month_name", false, true, "Improved compatibility with MySQL DATE_FORMAT/STR_TO_DATE"}}}, - {"23.3", {{"output_format_parquet_version", "1.0", "2.latest", "Use latest Parquet format version for output format"}, - {"input_format_json_ignore_unknown_keys_in_named_tuple", false, true, "Improve parsing JSON objects as named tuples"}, - {"input_format_native_allow_types_conversion", false, true, "Allow types conversion in Native input forma"}, - {"output_format_arrow_compression_method", "none", "lz4_frame", "Use lz4 compression in Arrow output format by default"}, - {"output_format_parquet_compression_method", "snappy", "lz4", "Use lz4 compression in Parquet output format by default"}, - {"output_format_orc_compression_method", "none", "lz4_frame", "Use lz4 compression in ORC output format by default"}, - {"async_query_sending_for_remote", false, true, "Create connections and send query async across shards"}}}, - {"23.2", {{"output_format_parquet_fixed_string_as_fixed_byte_array", false, true, "Use Parquet FIXED_LENGTH_BYTE_ARRAY type for FixedString by default"}, - {"output_format_arrow_fixed_string_as_fixed_byte_array", false, true, "Use Arrow FIXED_SIZE_BINARY type for FixedString by default"}, - {"query_plan_remove_redundant_distinct", false, true, "Remove redundant Distinct step in query plan"}, - {"optimize_duplicate_order_by_and_distinct", true, false, "Remove duplicate ORDER BY and DISTINCT if it's possible"}, - {"insert_keeper_max_retries", 0, 20, "Enable reconnections to Keeper on INSERT, improve reliability"}}}, - {"23.1", {{"input_format_json_read_objects_as_strings", 0, 1, "Enable reading nested json objects as strings while object type is experimental"}, - {"input_format_json_defaults_for_missing_elements_in_named_tuple", false, true, "Allow missing elements in JSON objects while reading named tuples by default"}, - {"input_format_csv_detect_header", false, true, "Detect header in CSV format by default"}, - {"input_format_tsv_detect_header", false, true, "Detect header in TSV format by default"}, - {"input_format_custom_detect_header", false, true, "Detect header in CustomSeparated format by default"}, - {"query_plan_remove_redundant_sorting", false, true, "Remove redundant sorting in query plan. For example, sorting steps related to ORDER BY clauses in subqueries"}}}, - {"22.12", {{"max_size_to_preallocate_for_aggregation", 10'000'000, 100'000'000, "This optimizes performance"}, - {"query_plan_aggregation_in_order", 0, 1, "Enable some refactoring around query plan"}, - {"format_binary_max_string_size", 0, 1_GiB, "Prevent allocating large amount of memory"}}}, - {"22.11", {{"use_structure_from_insertion_table_in_table_functions", 0, 2, "Improve using structure from insertion table in table functions"}}}, - {"22.9", {{"force_grouping_standard_compatibility", false, true, "Make GROUPING function output the same as in SQL standard and other DBMS"}}}, - {"22.7", {{"cross_to_inner_join_rewrite", 1, 2, "Force rewrite comma join to inner"}, - {"enable_positional_arguments", false, true, "Enable positional arguments feature by default"}, - {"format_csv_allow_single_quotes", true, false, "Most tools don't treat single quote in CSV specially, don't do it by default too"}}}, - {"22.6", {{"output_format_json_named_tuples_as_objects", false, true, "Allow to serialize named tuples as JSON objects in JSON formats by default"}, - {"input_format_skip_unknown_fields", false, true, "Optimize reading subset of columns for some input formats"}}}, - {"22.5", {{"memory_overcommit_ratio_denominator", 0, 1073741824, "Enable memory overcommit feature by default"}, - {"memory_overcommit_ratio_denominator_for_user", 0, 1073741824, "Enable memory overcommit feature by default"}}}, - {"22.4", {{"allow_settings_after_format_in_insert", true, false, "Do not allow SETTINGS after FORMAT for INSERT queries because ClickHouse interpret SETTINGS as some values, which is misleading"}}}, - {"22.3", {{"cast_ipv4_ipv6_default_on_conversion_error", true, false, "Make functions cast(value, 'IPv4') and cast(value, 'IPv6') behave same as toIPv4 and toIPv6 functions"}}}, - {"21.12", {{"stream_like_engine_allow_direct_select", true, false, "Do not allow direct select for Kafka/RabbitMQ/FileLog by default"}}}, - {"21.9", {{"output_format_decimal_trailing_zeros", true, false, "Do not output trailing zeros in text representation of Decimal types by default for better looking output"}, - {"use_hedged_requests", false, true, "Enable Hedged Requests feature by default"}}}, - {"21.7", {{"legacy_column_name_of_tuple_literal", true, false, "Add this setting only for compatibility reasons. It makes sense to set to 'true', while doing rolling update of cluster from version lower than 21.7 to higher"}}}, - {"21.5", {{"async_socket_for_remote", false, true, "Fix all problems and turn on asynchronous reads from socket for remote queries by default again"}}}, - {"21.3", {{"async_socket_for_remote", true, false, "Turn off asynchronous reads from socket for remote queries because of some problems"}, - {"optimize_normalize_count_variants", false, true, "Rewrite aggregate functions that semantically equals to count() as count() by default"}, - {"normalize_function_names", false, true, "Normalize function names to their canonical names, this was needed for projection query routing"}}}, - {"21.2", {{"enable_global_with_statement", false, true, "Propagate WITH statements to UNION queries and all subqueries by default"}}}, - {"21.1", {{"insert_quorum_parallel", false, true, "Use parallel quorum inserts by default. It is significantly more convenient to use than sequential quorum inserts"}, - {"input_format_null_as_default", false, true, "Allow to insert NULL as default for input formats by default"}, - {"optimize_on_insert", false, true, "Enable data optimization on INSERT by default for better user experience"}, - {"use_compact_format_in_distributed_parts_names", false, true, "Use compact format for async INSERT into Distributed tables by default"}}}, - {"20.10", {{"format_regexp_escaping_rule", "Escaped", "Raw", "Use Raw as default escaping rule for Regexp format to male the behaviour more like to what users expect"}}}, - {"20.7", {{"show_table_uuid_in_table_create_query_if_not_nil", true, false, "Stop showing UID of the table in its CREATE query for Engine=Atomic"}}}, - {"20.5", {{"input_format_with_names_use_header", false, true, "Enable using header with names for formats with WithNames/WithNamesAndTypes suffixes"}, - {"allow_suspicious_codecs", true, false, "Don't allow to specify meaningless compression codecs"}}}, - {"20.4", {{"validate_polygons", false, true, "Throw exception if polygon is invalid in function pointInPolygon by default instead of returning possibly wrong results"}}}, - {"19.18", {{"enable_scalar_subquery_optimization", false, true, "Prevent scalar subqueries from (de)serializing large scalar values and possibly avoid running the same subquery more than once"}}}, - {"19.14", {{"any_join_distinct_right_table_keys", true, false, "Disable ANY RIGHT and ANY FULL JOINs by default to avoid inconsistency"}}}, - {"19.12", {{"input_format_defaults_for_omitted_fields", false, true, "Enable calculation of complex default expressions for omitted fields for some input formats, because it should be the expected behaviour"}}}, - {"19.5", {{"max_partitions_per_insert_block", 0, 100, "Add a limit for the number of partitions in one block"}}}, - {"18.12.17", {{"enable_optimize_predicate_expression", 0, 1, "Optimize predicates to subqueries by default"}}}, + {"24.12", + { + } + }, + {"24.11", + { + } + }, + {"24.10", + { + } + }, + {"24.9", + { + } + }, + {"24.8", + { + {"merge_tree_min_bytes_per_task_for_remote_reading", 4194304, 2097152, "Value is unified with `filesystem_prefetch_min_bytes_for_single_read_task`"}, + } + }, + {"24.7", + { + {"output_format_parquet_write_page_index", false, true, "Add a possibility to write page index into parquet files."}, + {"output_format_binary_encode_types_in_binary_format", false, false, "Added new setting to allow to write type names in binary format in RowBinaryWithNamesAndTypes output format"}, + {"input_format_binary_decode_types_in_binary_format", false, false, "Added new setting to allow to read type names in binary format in RowBinaryWithNamesAndTypes input format"}, + {"output_format_native_encode_types_in_binary_format", false, false, "Added new setting to allow to write type names in binary format in Native output format"}, + {"input_format_native_decode_types_in_binary_format", false, false, "Added new setting to allow to read type names in binary format in Native output format"}, + {"read_in_order_use_buffering", false, true, "Use buffering before merging while reading in order of primary key"}, + {"enable_named_columns_in_function_tuple", false, true, "Generate named tuples in function tuple() when all names are unique and can be treated as unquoted identifiers."}, + {"input_format_json_case_insensitive_column_matching", false, false, "Ignore case when matching JSON keys with CH columns."}, + {"optimize_trivial_insert_select", true, false, "The optimization does not make sense in many cases."}, + {"dictionary_validate_primary_key_type", false, false, "Validate primary key type for dictionaries. By default id type for simple layouts will be implicitly converted to UInt64."}, + {"collect_hash_table_stats_during_joins", false, true, "New setting."}, + {"max_size_to_preallocate_for_joins", 0, 100'000'000, "New setting."}, + {"input_format_orc_read_use_writer_time_zone", false, false, "Whether use the writer's time zone in ORC stripe for ORC row reader, the default ORC row reader's time zone is GMT."}, + {"lightweight_mutation_projection_mode", "throw", "throw", "When lightweight delete happens on a table with projection(s), the possible operations include throw the exception as projection exists, or drop all projection related to this table then do lightweight delete."}, + {"database_replicated_allow_heavy_create", true, false, "Long-running DDL queries (CREATE AS SELECT and POPULATE) for Replicated database engine was forbidden"}, + {"query_plan_merge_filters", false, false, "Allow to merge filters in the query plan"}, + {"azure_sdk_max_retries", 10, 10, "Maximum number of retries in azure sdk"}, + {"azure_sdk_retry_initial_backoff_ms", 10, 10, "Minimal backoff between retries in azure sdk"}, + {"azure_sdk_retry_max_backoff_ms", 1000, 1000, "Maximal backoff between retries in azure sdk"}, + {"ignore_on_cluster_for_replicated_named_collections_queries", false, false, "Ignore ON CLUSTER clause for replicated named collections management queries."}, + {"backup_restore_s3_retry_attempts", 1000,1000, "Setting for Aws::Client::RetryStrategy, Aws::Client does retries itself, 0 means no retries. It takes place only for backup/restore."}, + {"postgresql_connection_attempt_timeout", 2, 2, "Allow to control 'connect_timeout' parameter of PostgreSQL connection."}, + {"postgresql_connection_pool_retries", 2, 2, "Allow to control the number of retries in PostgreSQL connection pool."} + } + }, + {"24.6", + { + {"materialize_skip_indexes_on_insert", true, true, "Added new setting to allow to disable materialization of skip indexes on insert"}, + {"materialize_statistics_on_insert", true, true, "Added new setting to allow to disable materialization of statistics on insert"}, + {"input_format_parquet_use_native_reader", false, false, "When reading Parquet files, to use native reader instead of arrow reader."}, + {"hdfs_throw_on_zero_files_match", false, false, "Allow to throw an error when ListObjects request cannot match any files in HDFS engine instead of empty query result"}, + {"azure_throw_on_zero_files_match", false, false, "Allow to throw an error when ListObjects request cannot match any files in AzureBlobStorage engine instead of empty query result"}, + {"s3_validate_request_settings", true, true, "Allow to disable S3 request settings validation"}, + {"allow_experimental_full_text_index", false, false, "Enable experimental full-text index"}, + {"azure_skip_empty_files", false, false, "Allow to skip empty files in azure table engine"}, + {"hdfs_ignore_file_doesnt_exist", false, false, "Allow to return 0 rows when the requested files don't exist instead of throwing an exception in HDFS table engine"}, + {"azure_ignore_file_doesnt_exist", false, false, "Allow to return 0 rows when the requested files don't exist instead of throwing an exception in AzureBlobStorage table engine"}, + {"s3_ignore_file_doesnt_exist", false, false, "Allow to return 0 rows when the requested files don't exist instead of throwing an exception in S3 table engine"}, + {"s3_max_part_number", 10000, 10000, "Maximum part number number for s3 upload part"}, + {"s3_max_single_operation_copy_size", 32 * 1024 * 1024, 32 * 1024 * 1024, "Maximum size for a single copy operation in s3"}, + {"input_format_parquet_max_block_size", 8192, DEFAULT_BLOCK_SIZE, "Increase block size for parquet reader."}, + {"input_format_parquet_prefer_block_bytes", 0, DEFAULT_BLOCK_SIZE * 256, "Average block bytes output by parquet reader."}, + {"enable_blob_storage_log", true, true, "Write information about blob storage operations to system.blob_storage_log table"}, + {"allow_deprecated_snowflake_conversion_functions", true, false, "Disabled deprecated functions snowflakeToDateTime[64] and dateTime[64]ToSnowflake."}, + {"allow_statistic_optimize", false, false, "Old setting which popped up here being renamed."}, + {"allow_experimental_statistic", false, false, "Old setting which popped up here being renamed."}, + {"allow_statistics_optimize", false, false, "The setting was renamed. The previous name is `allow_statistic_optimize`."}, + {"allow_experimental_statistics", false, false, "The setting was renamed. The previous name is `allow_experimental_statistic`."}, + {"enable_vertical_final", false, true, "Enable vertical final by default again after fixing bug"}, + {"parallel_replicas_custom_key_range_lower", 0, 0, "Add settings to control the range filter when using parallel replicas with dynamic shards"}, + {"parallel_replicas_custom_key_range_upper", 0, 0, "Add settings to control the range filter when using parallel replicas with dynamic shards. A value of 0 disables the upper limit"}, + {"output_format_pretty_display_footer_column_names", 0, 1, "Add a setting to display column names in the footer if there are many rows. Threshold value is controlled by output_format_pretty_display_footer_column_names_min_rows."}, + {"output_format_pretty_display_footer_column_names_min_rows", 0, 50, "Add a setting to control the threshold value for setting output_format_pretty_display_footer_column_names_min_rows. Default 50."}, + {"output_format_csv_serialize_tuple_into_separate_columns", true, true, "A new way of how interpret tuples in CSV format was added."}, + {"input_format_csv_deserialize_separate_columns_into_tuple", true, true, "A new way of how interpret tuples in CSV format was added."}, + {"input_format_csv_try_infer_strings_from_quoted_tuples", true, true, "A new way of how interpret tuples in CSV format was added."}, + } + }, + {"24.5", + { + {"allow_deprecated_error_prone_window_functions", true, false, "Allow usage of deprecated error prone window functions (neighbor, runningAccumulate, runningDifferenceStartingWithFirstValue, runningDifference)"}, + {"allow_experimental_join_condition", false, false, "Support join with inequal conditions which involve columns from both left and right table. e.g. t1.y < t2.y."}, + {"input_format_tsv_crlf_end_of_line", false, false, "Enables reading of CRLF line endings with TSV formats"}, + {"output_format_parquet_use_custom_encoder", false, true, "Enable custom Parquet encoder."}, + {"cross_join_min_rows_to_compress", 0, 10000000, "Minimal count of rows to compress block in CROSS JOIN. Zero value means - disable this threshold. This block is compressed when any of the two thresholds (by rows or by bytes) are reached."}, + {"cross_join_min_bytes_to_compress", 0, 1_GiB, "Minimal size of block to compress in CROSS JOIN. Zero value means - disable this threshold. This block is compressed when any of the two thresholds (by rows or by bytes) are reached."}, + {"http_max_chunk_size", 0, 0, "Internal limitation"}, + {"prefer_external_sort_block_bytes", 0, DEFAULT_BLOCK_SIZE * 256, "Prefer maximum block bytes for external sort, reduce the memory usage during merging."}, + {"input_format_force_null_for_omitted_fields", false, false, "Disable type-defaults for omitted fields when needed"}, + {"cast_string_to_dynamic_use_inference", false, false, "Add setting to allow converting String to Dynamic through parsing"}, + {"allow_experimental_dynamic_type", false, false, "Add new experimental Dynamic type"}, + {"azure_max_blocks_in_multipart_upload", 50000, 50000, "Maximum number of blocks in multipart upload for Azure."}, + } + }, + {"24.4", + { + {"input_format_json_throw_on_bad_escape_sequence", true, true, "Allow to save JSON strings with bad escape sequences"}, + {"max_parsing_threads", 0, 0, "Add a separate setting to control number of threads in parallel parsing from files"}, + {"ignore_drop_queries_probability", 0, 0, "Allow to ignore drop queries in server with specified probability for testing purposes"}, + {"lightweight_deletes_sync", 2, 2, "The same as 'mutation_sync', but controls only execution of lightweight deletes"}, + {"query_cache_system_table_handling", "save", "throw", "The query cache no longer caches results of queries against system tables"}, + {"input_format_json_ignore_unnecessary_fields", false, true, "Ignore unnecessary fields and not parse them. Enabling this may not throw exceptions on json strings of invalid format or with duplicated fields"}, + {"input_format_hive_text_allow_variable_number_of_columns", false, true, "Ignore extra columns in Hive Text input (if file has more columns than expected) and treat missing fields in Hive Text input as default values."}, + {"allow_experimental_database_replicated", false, true, "Database engine Replicated is now in Beta stage"}, + {"temporary_data_in_cache_reserve_space_wait_lock_timeout_milliseconds", (10 * 60 * 1000), (10 * 60 * 1000), "Wait time to lock cache for sapce reservation in temporary data in filesystem cache"}, + {"optimize_rewrite_sum_if_to_count_if", false, true, "Only available for the analyzer, where it works correctly"}, + {"azure_allow_parallel_part_upload", "true", "true", "Use multiple threads for azure multipart upload."}, + {"max_recursive_cte_evaluation_depth", DBMS_RECURSIVE_CTE_MAX_EVALUATION_DEPTH, DBMS_RECURSIVE_CTE_MAX_EVALUATION_DEPTH, "Maximum limit on recursive CTE evaluation depth"}, + {"query_plan_convert_outer_join_to_inner_join", false, true, "Allow to convert OUTER JOIN to INNER JOIN if filter after JOIN always filters default values"}, + } + }, + {"24.3", + { + {"s3_connect_timeout_ms", 1000, 1000, "Introduce new dedicated setting for s3 connection timeout"}, + {"allow_experimental_shared_merge_tree", false, true, "The setting is obsolete"}, + {"use_page_cache_for_disks_without_file_cache", false, false, "Added userspace page cache"}, + {"read_from_page_cache_if_exists_otherwise_bypass_cache", false, false, "Added userspace page cache"}, + {"page_cache_inject_eviction", false, false, "Added userspace page cache"}, + {"default_table_engine", "None", "MergeTree", "Set default table engine to MergeTree for better usability"}, + {"input_format_json_use_string_type_for_ambiguous_paths_in_named_tuples_inference_from_objects", false, false, "Allow to use String type for ambiguous paths during named tuple inference from JSON objects"}, + {"traverse_shadow_remote_data_paths", false, false, "Traverse shadow directory when query system.remote_data_paths."}, + {"throw_if_deduplication_in_dependent_materialized_views_enabled_with_async_insert", false, true, "Deduplication in dependent materialized view cannot work together with async inserts."}, + {"parallel_replicas_allow_in_with_subquery", false, true, "If true, subquery for IN will be executed on every follower replica"}, + {"log_processors_profiles", false, true, "Enable by default"}, + {"function_locate_has_mysql_compatible_argument_order", false, true, "Increase compatibility with MySQL's locate function."}, + {"allow_suspicious_primary_key", true, false, "Forbid suspicious PRIMARY KEY/ORDER BY for MergeTree (i.e. SimpleAggregateFunction)"}, + {"filesystem_cache_reserve_space_wait_lock_timeout_milliseconds", 1000, 1000, "Wait time to lock cache for sapce reservation in filesystem cache"}, + {"max_parser_backtracks", 0, 1000000, "Limiting the complexity of parsing"}, + {"analyzer_compatibility_join_using_top_level_identifier", false, false, "Force to resolve identifier in JOIN USING from projection"}, + {"distributed_insert_skip_read_only_replicas", false, false, "If true, INSERT into Distributed will skip read-only replicas"}, + {"keeper_max_retries", 10, 10, "Max retries for general keeper operations"}, + {"keeper_retry_initial_backoff_ms", 100, 100, "Initial backoff timeout for general keeper operations"}, + {"keeper_retry_max_backoff_ms", 5000, 5000, "Max backoff timeout for general keeper operations"}, + {"s3queue_allow_experimental_sharded_mode", false, false, "Enable experimental sharded mode of S3Queue table engine. It is experimental because it will be rewritten"}, + {"allow_experimental_analyzer", false, true, "Enable analyzer and planner by default."}, + {"merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability", 0.0, 0.0, "For testing of `PartsSplitter` - split read ranges into intersecting and non intersecting every time you read from MergeTree with the specified probability."}, + {"allow_get_client_http_header", false, false, "Introduced a new function."}, + {"output_format_pretty_row_numbers", false, true, "It is better for usability."}, + {"output_format_pretty_max_value_width_apply_for_single_value", true, false, "Single values in Pretty formats won't be cut."}, + {"output_format_parquet_string_as_string", false, true, "ClickHouse allows arbitrary binary data in the String data type, which is typically UTF-8. Parquet/ORC/Arrow Strings only support UTF-8. That's why you can choose which Arrow's data type to use for the ClickHouse String data type - String or Binary. While Binary would be more correct and compatible, using String by default will correspond to user expectations in most cases."}, + {"output_format_orc_string_as_string", false, true, "ClickHouse allows arbitrary binary data in the String data type, which is typically UTF-8. Parquet/ORC/Arrow Strings only support UTF-8. That's why you can choose which Arrow's data type to use for the ClickHouse String data type - String or Binary. While Binary would be more correct and compatible, using String by default will correspond to user expectations in most cases."}, + {"output_format_arrow_string_as_string", false, true, "ClickHouse allows arbitrary binary data in the String data type, which is typically UTF-8. Parquet/ORC/Arrow Strings only support UTF-8. That's why you can choose which Arrow's data type to use for the ClickHouse String data type - String or Binary. While Binary would be more correct and compatible, using String by default will correspond to user expectations in most cases."}, + {"output_format_parquet_compression_method", "lz4", "zstd", "Parquet/ORC/Arrow support many compression methods, including lz4 and zstd. ClickHouse supports each and every compression method. Some inferior tools, such as 'duckdb', lack support for the faster `lz4` compression method, that's why we set zstd by default."}, + {"output_format_orc_compression_method", "lz4", "zstd", "Parquet/ORC/Arrow support many compression methods, including lz4 and zstd. ClickHouse supports each and every compression method. Some inferior tools, such as 'duckdb', lack support for the faster `lz4` compression method, that's why we set zstd by default."}, + {"output_format_pretty_highlight_digit_groups", false, true, "If enabled and if output is a terminal, highlight every digit corresponding to the number of thousands, millions, etc. with underline."}, + {"geo_distance_returns_float64_on_float64_arguments", false, true, "Increase the default precision."}, + {"azure_max_inflight_parts_for_one_file", 20, 20, "The maximum number of a concurrent loaded parts in multipart upload request. 0 means unlimited."}, + {"azure_strict_upload_part_size", 0, 0, "The exact size of part to upload during multipart upload to Azure blob storage."}, + {"azure_min_upload_part_size", 16*1024*1024, 16*1024*1024, "The minimum size of part to upload during multipart upload to Azure blob storage."}, + {"azure_max_upload_part_size", 5ull*1024*1024*1024, 5ull*1024*1024*1024, "The maximum size of part to upload during multipart upload to Azure blob storage."}, + {"azure_upload_part_size_multiply_factor", 2, 2, "Multiply azure_min_upload_part_size by this factor each time azure_multiply_parts_count_threshold parts were uploaded from a single write to Azure blob storage."}, + {"azure_upload_part_size_multiply_parts_count_threshold", 500, 500, "Each time this number of parts was uploaded to Azure blob storage, azure_min_upload_part_size is multiplied by azure_upload_part_size_multiply_factor."}, + {"output_format_csv_serialize_tuple_into_separate_columns", true, true, "A new way of how interpret tuples in CSV format was added."}, + {"input_format_csv_deserialize_separate_columns_into_tuple", true, true, "A new way of how interpret tuples in CSV format was added."}, + {"input_format_csv_try_infer_strings_from_quoted_tuples", true, true, "A new way of how interpret tuples in CSV format was added."}, + } + }, + {"24.2", + { + {"allow_suspicious_variant_types", true, false, "Don't allow creating Variant type with suspicious variants by default"}, + {"validate_experimental_and_suspicious_types_inside_nested_types", false, true, "Validate usage of experimental and suspicious types inside nested types"}, + {"output_format_values_escape_quote_with_quote", false, false, "If true escape ' with '', otherwise quoted with \\'"}, + {"output_format_pretty_single_large_number_tip_threshold", 0, 1'000'000, "Print a readable number tip on the right side of the table if the block consists of a single number which exceeds this value (except 0)"}, + {"input_format_try_infer_exponent_floats", true, false, "Don't infer floats in exponential notation by default"}, + {"query_plan_optimize_prewhere", true, true, "Allow to push down filter to PREWHERE expression for supported storages"}, + {"async_insert_max_data_size", 1000000, 10485760, "The previous value appeared to be too small."}, + {"async_insert_poll_timeout_ms", 10, 10, "Timeout in milliseconds for polling data from asynchronous insert queue"}, + {"async_insert_use_adaptive_busy_timeout", false, true, "Use adaptive asynchronous insert timeout"}, + {"async_insert_busy_timeout_min_ms", 50, 50, "The minimum value of the asynchronous insert timeout in milliseconds; it also serves as the initial value, which may be increased later by the adaptive algorithm"}, + {"async_insert_busy_timeout_max_ms", 200, 200, "The minimum value of the asynchronous insert timeout in milliseconds; async_insert_busy_timeout_ms is aliased to async_insert_busy_timeout_max_ms"}, + {"async_insert_busy_timeout_increase_rate", 0.2, 0.2, "The exponential growth rate at which the adaptive asynchronous insert timeout increases"}, + {"async_insert_busy_timeout_decrease_rate", 0.2, 0.2, "The exponential growth rate at which the adaptive asynchronous insert timeout decreases"}, + {"format_template_row_format", "", "", "Template row format string can be set directly in query"}, + {"format_template_resultset_format", "", "", "Template result set format string can be set in query"}, + {"split_parts_ranges_into_intersecting_and_non_intersecting_final", true, true, "Allow to split parts ranges into intersecting and non intersecting during FINAL optimization"}, + {"split_intersecting_parts_ranges_into_layers_final", true, true, "Allow to split intersecting parts ranges into layers during FINAL optimization"}, + {"azure_max_single_part_copy_size", 256*1024*1024, 256*1024*1024, "The maximum size of object to copy using single part copy to Azure blob storage."}, + {"min_external_table_block_size_rows", DEFAULT_INSERT_BLOCK_SIZE, DEFAULT_INSERT_BLOCK_SIZE, "Squash blocks passed to external table to specified size in rows, if blocks are not big enough"}, + {"min_external_table_block_size_bytes", DEFAULT_INSERT_BLOCK_SIZE * 256, DEFAULT_INSERT_BLOCK_SIZE * 256, "Squash blocks passed to external table to specified size in bytes, if blocks are not big enough."}, + {"parallel_replicas_prefer_local_join", true, true, "If true, and JOIN can be executed with parallel replicas algorithm, and all storages of right JOIN part are *MergeTree, local JOIN will be used instead of GLOBAL JOIN."}, + {"optimize_time_filter_with_preimage", true, true, "Optimize Date and DateTime predicates by converting functions into equivalent comparisons without conversions (e.g. toYear(col) = 2023 -> col >= '2023-01-01' AND col <= '2023-12-31')"}, + {"extract_key_value_pairs_max_pairs_per_row", 0, 0, "Max number of pairs that can be produced by the `extractKeyValuePairs` function. Used as a safeguard against consuming too much memory."}, + {"default_view_definer", "CURRENT_USER", "CURRENT_USER", "Allows to set default `DEFINER` option while creating a view"}, + {"default_materialized_view_sql_security", "DEFINER", "DEFINER", "Allows to set a default value for SQL SECURITY option when creating a materialized view"}, + {"default_normal_view_sql_security", "INVOKER", "INVOKER", "Allows to set default `SQL SECURITY` option while creating a normal view"}, + {"mysql_map_string_to_text_in_show_columns", false, true, "Reduce the configuration effort to connect ClickHouse with BI tools."}, + {"mysql_map_fixed_string_to_text_in_show_columns", false, true, "Reduce the configuration effort to connect ClickHouse with BI tools."}, + } + }, + {"24.1", + { + {"print_pretty_type_names", false, true, "Better user experience."}, + {"input_format_json_read_bools_as_strings", false, true, "Allow to read bools as strings in JSON formats by default"}, + {"output_format_arrow_use_signed_indexes_for_dictionary", false, true, "Use signed indexes type for Arrow dictionaries by default as it's recommended"}, + {"allow_experimental_variant_type", false, false, "Add new experimental Variant type"}, + {"use_variant_as_common_type", false, false, "Allow to use Variant in if/multiIf if there is no common type"}, + {"output_format_arrow_use_64_bit_indexes_for_dictionary", false, false, "Allow to use 64 bit indexes type in Arrow dictionaries"}, + {"parallel_replicas_mark_segment_size", 128, 128, "Add new setting to control segment size in new parallel replicas coordinator implementation"}, + {"ignore_materialized_views_with_dropped_target_table", false, false, "Add new setting to allow to ignore materialized views with dropped target table"}, + {"output_format_compression_level", 3, 3, "Allow to change compression level in the query output"}, + {"output_format_compression_zstd_window_log", 0, 0, "Allow to change zstd window log in the query output when zstd compression is used"}, + {"enable_zstd_qat_codec", false, false, "Add new ZSTD_QAT codec"}, + {"enable_vertical_final", false, true, "Use vertical final by default"}, + {"output_format_arrow_use_64_bit_indexes_for_dictionary", false, false, "Allow to use 64 bit indexes type in Arrow dictionaries"}, + {"max_rows_in_set_to_optimize_join", 100000, 0, "Disable join optimization as it prevents from read in order optimization"}, + {"output_format_pretty_color", true, "auto", "Setting is changed to allow also for auto value, disabling ANSI escapes if output is not a tty"}, + {"function_visible_width_behavior", 0, 1, "We changed the default behavior of `visibleWidth` to be more precise"}, + {"max_estimated_execution_time", 0, 0, "Separate max_execution_time and max_estimated_execution_time"}, + {"iceberg_engine_ignore_schema_evolution", false, false, "Allow to ignore schema evolution in Iceberg table engine"}, + {"optimize_injective_functions_in_group_by", false, true, "Replace injective functions by it's arguments in GROUP BY section in analyzer"}, + {"update_insert_deduplication_token_in_dependent_materialized_views", false, false, "Allow to update insert deduplication token with table identifier during insert in dependent materialized views"}, + {"azure_max_unexpected_write_error_retries", 4, 4, "The maximum number of retries in case of unexpected errors during Azure blob storage write"}, + {"split_parts_ranges_into_intersecting_and_non_intersecting_final", false, true, "Allow to split parts ranges into intersecting and non intersecting during FINAL optimization"}, + {"split_intersecting_parts_ranges_into_layers_final", true, true, "Allow to split intersecting parts ranges into layers during FINAL optimization"} + } + }, + {"23.12", + { + {"allow_suspicious_ttl_expressions", true, false, "It is a new setting, and in previous versions the behavior was equivalent to allowing."}, + {"input_format_parquet_allow_missing_columns", false, true, "Allow missing columns in Parquet files by default"}, + {"input_format_orc_allow_missing_columns", false, true, "Allow missing columns in ORC files by default"}, + {"input_format_arrow_allow_missing_columns", false, true, "Allow missing columns in Arrow files by default"} + } + }, + {"23.11", + { + {"parsedatetime_parse_without_leading_zeros", false, true, "Improved compatibility with MySQL DATE_FORMAT/STR_TO_DATE"} + } + }, + {"23.9", + { + {"optimize_group_by_constant_keys", false, true, "Optimize group by constant keys by default"}, + {"input_format_json_try_infer_named_tuples_from_objects", false, true, "Try to infer named Tuples from JSON objects by default"}, + {"input_format_json_read_numbers_as_strings", false, true, "Allow to read numbers as strings in JSON formats by default"}, + {"input_format_json_read_arrays_as_strings", false, true, "Allow to read arrays as strings in JSON formats by default"}, + {"input_format_json_infer_incomplete_types_as_strings", false, true, "Allow to infer incomplete types as Strings in JSON formats by default"}, + {"input_format_json_try_infer_numbers_from_strings", true, false, "Don't infer numbers from strings in JSON formats by default to prevent possible parsing errors"}, + {"http_write_exception_in_output_format", false, true, "Output valid JSON/XML on exception in HTTP streaming."} + } + }, + {"23.8", + { + {"rewrite_count_distinct_if_with_count_distinct_implementation", false, true, "Rewrite countDistinctIf with count_distinct_implementation configuration"} + } + }, + {"23.7", + { + {"function_sleep_max_microseconds_per_block", 0, 3000000, "In previous versions, the maximum sleep time of 3 seconds was applied only for `sleep`, but not for `sleepEachRow` function. In the new version, we introduce this setting. If you set compatibility with the previous versions, we will disable the limit altogether."} + } + }, + {"23.6", + { + {"http_send_timeout", 180, 30, "3 minutes seems crazy long. Note that this is timeout for a single network write call, not for the whole upload operation."}, + {"http_receive_timeout", 180, 30, "See http_send_timeout."} + } + }, + {"23.5", + { + {"input_format_parquet_preserve_order", true, false, "Allow Parquet reader to reorder rows for better parallelism."}, + {"parallelize_output_from_storages", false, true, "Allow parallelism when executing queries that read from file/url/s3/etc. This may reorder rows."}, + {"use_with_fill_by_sorting_prefix", false, true, "Columns preceding WITH FILL columns in ORDER BY clause form sorting prefix. Rows with different values in sorting prefix are filled independently"}, + {"output_format_parquet_compliant_nested_types", false, true, "Change an internal field name in output Parquet file schema."} + } + }, + {"23.4", + { + {"allow_suspicious_indices", true, false, "If true, index can defined with identical expressions"}, + {"allow_nonconst_timezone_arguments", true, false, "Allow non-const timezone arguments in certain time-related functions like toTimeZone(), fromUnixTimestamp*(), snowflakeToDateTime*()."}, + {"connect_timeout_with_failover_ms", 50, 1000, "Increase default connect timeout because of async connect"}, + {"connect_timeout_with_failover_secure_ms", 100, 1000, "Increase default secure connect timeout because of async connect"}, + {"hedged_connection_timeout_ms", 100, 50, "Start new connection in hedged requests after 50 ms instead of 100 to correspond with previous connect timeout"}, + {"formatdatetime_f_prints_single_zero", true, false, "Improved compatibility with MySQL DATE_FORMAT()/STR_TO_DATE()"}, + {"formatdatetime_parsedatetime_m_is_month_name", false, true, "Improved compatibility with MySQL DATE_FORMAT/STR_TO_DATE"} + } + }, + {"23.3", + { + {"output_format_parquet_version", "1.0", "2.latest", "Use latest Parquet format version for output format"}, + {"input_format_json_ignore_unknown_keys_in_named_tuple", false, true, "Improve parsing JSON objects as named tuples"}, + {"input_format_native_allow_types_conversion", false, true, "Allow types conversion in Native input forma"}, + {"output_format_arrow_compression_method", "none", "lz4_frame", "Use lz4 compression in Arrow output format by default"}, + {"output_format_parquet_compression_method", "snappy", "lz4", "Use lz4 compression in Parquet output format by default"}, + {"output_format_orc_compression_method", "none", "lz4_frame", "Use lz4 compression in ORC output format by default"}, + {"async_query_sending_for_remote", false, true, "Create connections and send query async across shards"} + } + }, + {"23.2", + { + {"output_format_parquet_fixed_string_as_fixed_byte_array", false, true, "Use Parquet FIXED_LENGTH_BYTE_ARRAY type for FixedString by default"}, + {"output_format_arrow_fixed_string_as_fixed_byte_array", false, true, "Use Arrow FIXED_SIZE_BINARY type for FixedString by default"}, + {"query_plan_remove_redundant_distinct", false, true, "Remove redundant Distinct step in query plan"}, + {"optimize_duplicate_order_by_and_distinct", true, false, "Remove duplicate ORDER BY and DISTINCT if it's possible"}, + {"insert_keeper_max_retries", 0, 20, "Enable reconnections to Keeper on INSERT, improve reliability"} + } + }, + {"23.1", + { + {"input_format_json_read_objects_as_strings", 0, 1, "Enable reading nested json objects as strings while object type is experimental"}, + {"input_format_json_defaults_for_missing_elements_in_named_tuple", false, true, "Allow missing elements in JSON objects while reading named tuples by default"}, + {"input_format_csv_detect_header", false, true, "Detect header in CSV format by default"}, + {"input_format_tsv_detect_header", false, true, "Detect header in TSV format by default"}, + {"input_format_custom_detect_header", false, true, "Detect header in CustomSeparated format by default"}, + {"query_plan_remove_redundant_sorting", false, true, "Remove redundant sorting in query plan. For example, sorting steps related to ORDER BY clauses in subqueries"} + } + }, + {"22.12", + { + {"max_size_to_preallocate_for_aggregation", 10'000'000, 100'000'000, "This optimizes performance"}, + {"query_plan_aggregation_in_order", 0, 1, "Enable some refactoring around query plan"}, + {"format_binary_max_string_size", 0, 1_GiB, "Prevent allocating large amount of memory"} + } + }, + {"22.11", + { + {"use_structure_from_insertion_table_in_table_functions", 0, 2, "Improve using structure from insertion table in table functions"} + } + }, + {"22.9", + { + {"force_grouping_standard_compatibility", false, true, "Make GROUPING function output the same as in SQL standard and other DBMS"} + } + }, + {"22.7", + { + {"cross_to_inner_join_rewrite", 1, 2, "Force rewrite comma join to inner"}, + {"enable_positional_arguments", false, true, "Enable positional arguments feature by default"}, + {"format_csv_allow_single_quotes", true, false, "Most tools don't treat single quote in CSV specially, don't do it by default too"} + } + }, + {"22.6", + { + {"output_format_json_named_tuples_as_objects", false, true, "Allow to serialize named tuples as JSON objects in JSON formats by default"}, + {"input_format_skip_unknown_fields", false, true, "Optimize reading subset of columns for some input formats"} + } + }, + {"22.5", + { + {"memory_overcommit_ratio_denominator", 0, 1073741824, "Enable memory overcommit feature by default"}, + {"memory_overcommit_ratio_denominator_for_user", 0, 1073741824, "Enable memory overcommit feature by default"} + } + }, + {"22.4", + { + {"allow_settings_after_format_in_insert", true, false, "Do not allow SETTINGS after FORMAT for INSERT queries because ClickHouse interpret SETTINGS as some values, which is misleading"} + } + }, + {"22.3", + { + {"cast_ipv4_ipv6_default_on_conversion_error", true, false, "Make functions cast(value, 'IPv4') and cast(value, 'IPv6') behave same as toIPv4 and toIPv6 functions"} + } + }, + {"21.12", + { + {"stream_like_engine_allow_direct_select", true, false, "Do not allow direct select for Kafka/RabbitMQ/FileLog by default"} + } + }, + {"21.9", + { + {"output_format_decimal_trailing_zeros", true, false, "Do not output trailing zeros in text representation of Decimal types by default for better looking output"}, + {"use_hedged_requests", false, true, "Enable Hedged Requests feature by default"} + } + }, + {"21.7", + { + {"legacy_column_name_of_tuple_literal", true, false, "Add this setting only for compatibility reasons. It makes sense to set to 'true', while doing rolling update of cluster from version lower than 21.7 to higher"} + } + }, + {"21.5", + { + {"async_socket_for_remote", false, true, "Fix all problems and turn on asynchronous reads from socket for remote queries by default again"} + } + }, + {"21.3", + { + {"async_socket_for_remote", true, false, "Turn off asynchronous reads from socket for remote queries because of some problems"}, + {"optimize_normalize_count_variants", false, true, "Rewrite aggregate functions that semantically equals to count() as count() by default"}, + {"normalize_function_names", false, true, "Normalize function names to their canonical names, this was needed for projection query routing"} + } + }, + {"21.2", + { + {"enable_global_with_statement", false, true, "Propagate WITH statements to UNION queries and all subqueries by default"} + } + }, + {"21.1", + { + {"insert_quorum_parallel", false, true, "Use parallel quorum inserts by default. It is significantly more convenient to use than sequential quorum inserts"}, + {"input_format_null_as_default", false, true, "Allow to insert NULL as default for input formats by default"}, + {"optimize_on_insert", false, true, "Enable data optimization on INSERT by default for better user experience"}, + {"use_compact_format_in_distributed_parts_names", false, true, "Use compact format for async INSERT into Distributed tables by default"} + } + }, + {"20.10", + { + {"format_regexp_escaping_rule", "Escaped", "Raw", "Use Raw as default escaping rule for Regexp format to male the behaviour more like to what users expect"} + } + }, + {"20.7", + { + {"show_table_uuid_in_table_create_query_if_not_nil", true, false, "Stop showing UID of the table in its CREATE query for Engine=Atomic"} + } + }, + {"20.5", + { + {"input_format_with_names_use_header", false, true, "Enable using header with names for formats with WithNames/WithNamesAndTypes suffixes"}, + {"allow_suspicious_codecs", true, false, "Don't allow to specify meaningless compression codecs"} + } + }, + {"20.4", + { + {"validate_polygons", false, true, "Throw exception if polygon is invalid in function pointInPolygon by default instead of returning possibly wrong results"} + } + }, + {"19.18", + { + {"enable_scalar_subquery_optimization", false, true, "Prevent scalar subqueries from (de)serializing large scalar values and possibly avoid running the same subquery more than once"} + } + }, + {"19.14", + { + {"any_join_distinct_right_table_keys", true, false, "Disable ANY RIGHT and ANY FULL JOINs by default to avoid inconsistency"} + } + }, + {"19.12", + { + {"input_format_defaults_for_omitted_fields", false, true, "Enable calculation of complex default expressions for omitted fields for some input formats, because it should be the expected behaviour"} + } + }, + {"19.5", + { + {"max_partitions_per_insert_block", 0, 100, "Add a limit for the number of partitions in one block"} + } + }, + {"18.12.17", + { + {"enable_optimize_predicate_expression", 0, 1, "Optimize predicates to subqueries by default"} + } + }, }; From 31c142a96d49fbe1b46b21e4cdad366546dc7864 Mon Sep 17 00:00:00 2001 From: Michael Stetsyuk Date: Wed, 31 Jul 2024 14:44:54 +0100 Subject: [PATCH 500/661] make it possible to rerun test_storage_delta and test_checking_s3_blobs_paranoid --- .../test_checking_s3_blobs_paranoid/test.py | 2 ++ tests/integration/test_storage_delta/test.py | 29 +++++++++++++++++++ 2 files changed, 31 insertions(+) diff --git a/tests/integration/test_checking_s3_blobs_paranoid/test.py b/tests/integration/test_checking_s3_blobs_paranoid/test.py index dde636b5d29..afe8449b44a 100644 --- a/tests/integration/test_checking_s3_blobs_paranoid/test.py +++ b/tests/integration/test_checking_s3_blobs_paranoid/test.py @@ -61,6 +61,7 @@ def test_upload_after_check_works(cluster, broken_s3): node.query( """ + DROP TABLE IF EXISTS s3_upload_after_check_works; CREATE TABLE s3_upload_after_check_works ( id Int64, data String @@ -631,6 +632,7 @@ def test_no_key_found_disk(cluster, broken_s3): node.query( """ + DROP TABLE IF EXISTS no_key_found_disk; CREATE TABLE no_key_found_disk ( id Int64 ) ENGINE=MergeTree() diff --git a/tests/integration/test_storage_delta/test.py b/tests/integration/test_storage_delta/test.py index 67cc7cdd6da..698becc18c4 100644 --- a/tests/integration/test_storage_delta/test.py +++ b/tests/integration/test_storage_delta/test.py @@ -52,6 +52,11 @@ def get_spark(): return builder.master("local").getOrCreate() +def remove_local_directory_contents(local_path): + for local_file in glob.glob(local_path + "/**"): + os.unlink(local_file) + + @pytest.fixture(scope="module") def started_cluster(): try: @@ -169,6 +174,9 @@ def test_single_log_file(started_cluster): inserted_data ) + os.unlink(parquet_data_path) + remove_local_directory_contents(f"/{TABLE_NAME}") + def test_partition_by(started_cluster): instance = started_cluster.instances["node1"] @@ -191,6 +199,7 @@ def test_partition_by(started_cluster): create_delta_table(instance, TABLE_NAME) assert int(instance.query(f"SELECT count() FROM {TABLE_NAME}")) == 10 + remove_local_directory_contents(f"/{TABLE_NAME}") def test_checkpoint(started_cluster): instance = started_cluster.instances["node1"] @@ -266,6 +275,9 @@ def test_checkpoint(started_cluster): ).strip() ) + remove_local_directory_contents(f"/{TABLE_NAME}") + spark.sql(f"DROP TABLE {TABLE_NAME}") + def test_multiple_log_files(started_cluster): instance = started_cluster.instances["node1"] @@ -304,6 +316,8 @@ def test_multiple_log_files(started_cluster): "SELECT number, toString(number + 1) FROM numbers(200)" ) + remove_local_directory_contents(f"/{TABLE_NAME}") + def test_metadata(started_cluster): instance = started_cluster.instances["node1"] @@ -337,6 +351,9 @@ def test_metadata(started_cluster): create_delta_table(instance, TABLE_NAME) assert int(instance.query(f"SELECT count() FROM {TABLE_NAME}")) == 100 + os.unlink(parquet_data_path) + remove_local_directory_contents(f"/{TABLE_NAME}") + def test_types(started_cluster): TABLE_NAME = "test_types" @@ -409,6 +426,9 @@ def test_types(started_cluster): ] ) + remove_local_directory_contents(f"/{result_file}") + spark.sql(f"DROP TABLE {TABLE_NAME}") + def test_restart_broken(started_cluster): instance = started_cluster.instances["node1"] @@ -470,6 +490,9 @@ def test_restart_broken(started_cluster): assert int(instance.query(f"SELECT count() FROM {TABLE_NAME}")) == 100 + os.unlink(parquet_data_path) + remove_local_directory_contents(f"/{TABLE_NAME}") + def test_restart_broken_table_function(started_cluster): instance = started_cluster.instances["node1"] @@ -524,6 +547,9 @@ def test_restart_broken_table_function(started_cluster): assert int(instance.query(f"SELECT count() FROM {TABLE_NAME}")) == 100 + os.unlink(parquet_data_path) + remove_local_directory_contents(f"/{TABLE_NAME}") + def test_partition_columns(started_cluster): instance = started_cluster.instances["node1"] @@ -721,3 +747,6 @@ SELECT * FROM deltaLake('http://{started_cluster.minio_ip}:{started_cluster.mini ) == 1 ) + + remove_local_directory_contents(f"/{TABLE_NAME}") + spark.sql(f"DROP TABLE {TABLE_NAME}") From 2a2dba63cc0182247754a5a4819cb89f21825bfd Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Wed, 31 Jul 2024 13:48:04 +0000 Subject: [PATCH 501/661] Automatic style fix --- tests/integration/helpers/cluster.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index 7f0a9154be9..2e38aec3512 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -2971,6 +2971,7 @@ class ClickHouseCluster: "Trying to create Azurite instance by command %s", " ".join(map(str, azurite_start_cmd)), ) + def logging_azurite_initialization(exception, retry_number, sleep_time): logging.info( f"Azurite initialization failed with error: {exception}" From 7dbd3d75340522195e7d08a725cf5ae116288c8e Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Wed, 31 Jul 2024 13:51:38 +0000 Subject: [PATCH 502/661] Automatic style fix --- tests/integration/test_storage_delta/test.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/integration/test_storage_delta/test.py b/tests/integration/test_storage_delta/test.py index 698becc18c4..e485bc90ee0 100644 --- a/tests/integration/test_storage_delta/test.py +++ b/tests/integration/test_storage_delta/test.py @@ -201,6 +201,7 @@ def test_partition_by(started_cluster): remove_local_directory_contents(f"/{TABLE_NAME}") + def test_checkpoint(started_cluster): instance = started_cluster.instances["node1"] spark = started_cluster.spark_session From 27f4f468b976e445e8b0dbc198ea9f0a9c62855b Mon Sep 17 00:00:00 2001 From: Michael Stetsyuk Date: Wed, 31 Jul 2024 14:55:00 +0100 Subject: [PATCH 503/661] make it possible to rerun test_recovery_time_metric multiple times --- tests/integration/test_recovery_time_metric/test.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tests/integration/test_recovery_time_metric/test.py b/tests/integration/test_recovery_time_metric/test.py index 628f2e744e0..6fcf2fad423 100644 --- a/tests/integration/test_recovery_time_metric/test.py +++ b/tests/integration/test_recovery_time_metric/test.py @@ -21,6 +21,7 @@ def start_cluster(): def test_recovery_time_metric(start_cluster): node.query( """ + DROP DATABASE IF EXISTS rdb; CREATE DATABASE rdb ENGINE = Replicated('/test/test_recovery_time_metric', 'shard1', 'replica1') """ @@ -28,6 +29,7 @@ def test_recovery_time_metric(start_cluster): node.query( """ + DROP TABLE IF EXISTS rdb.t; CREATE TABLE rdb.t ( `x` UInt32 @@ -51,3 +53,9 @@ def test_recovery_time_metric(start_cluster): ).strip() ) assert ret > 0 + + node.query( + """ + DROP DATABASE rdb + """ + ) From d6de2be4395e1bcc62ab32ad1d5b02e9db080303 Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Wed, 31 Jul 2024 16:08:18 +0200 Subject: [PATCH 504/661] Fix build --- src/Planner/findParallelReplicasQuery.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Planner/findParallelReplicasQuery.cpp b/src/Planner/findParallelReplicasQuery.cpp index 1140f30ad9c..39edb1e6516 100644 --- a/src/Planner/findParallelReplicasQuery.cpp +++ b/src/Planner/findParallelReplicasQuery.cpp @@ -144,7 +144,7 @@ public: std::unordered_map replacement_map; }; -QueryTreeNodePtr replaceTablesWithDummyTables(const QueryTreeNodePtr & query, const ContextPtr & context) +QueryTreeNodePtr replaceTablesWithDummyTables(QueryTreeNodePtr query, const ContextPtr & context) { ReplaceTableNodeToDummyVisitor visitor(context); visitor.visit(query); From 9ffbd8f5073e180592a494742d1dc3af4427b55f Mon Sep 17 00:00:00 2001 From: divanik Date: Wed, 31 Jul 2024 14:13:43 +0000 Subject: [PATCH 505/661] Possible fix --- .../03164_s3_settings_for_queries_and_merges.sql | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/queries/0_stateless/03164_s3_settings_for_queries_and_merges.sql b/tests/queries/0_stateless/03164_s3_settings_for_queries_and_merges.sql index ac2070fbd76..e43c9ae7717 100644 --- a/tests/queries/0_stateless/03164_s3_settings_for_queries_and_merges.sql +++ b/tests/queries/0_stateless/03164_s3_settings_for_queries_and_merges.sql @@ -22,18 +22,18 @@ SELECT count() FROM t_compact_bytes_s3 WHERE NOT ignore(c2, c4); SYSTEM FLUSH LOGS; SELECT - ProfileEvents['S3ReadRequestsCount'], + ProfileEvents['S3ReadRequestsCount'] - ProfileEvents['S3ReadRequestsError'], ProfileEvents['ReadBufferFromS3Bytes'] < ProfileEvents['ReadCompressedBytes'] * 1.1 FROM system.query_log -WHERE event_date >= yesterday() AND type = 'QueryFinish' +WHERE type = 'QueryFinish' AND current_database = currentDatabase() AND query ilike '%INSERT INTO t_compact_bytes_s3 SELECT number, number, number%'; SELECT - ProfileEvents['S3ReadRequestsCount'], + ProfileEvents['S3ReadRequestsCount'] - ProfileEvents['S3ReadRequestsError'], ProfileEvents['ReadBufferFromS3Bytes'] < ProfileEvents['ReadCompressedBytes'] * 1.1 FROM system.query_log -WHERE event_date >= yesterday() AND type = 'QueryFinish' +WHERE type = 'QueryFinish' AND current_database = currentDatabase() AND query ilike '%OPTIMIZE TABLE t_compact_bytes_s3 FINAL%'; From 67f4792b77f2a2cf0de21ead6e95c3635d26aa88 Mon Sep 17 00:00:00 2001 From: divanik Date: Wed, 31 Jul 2024 14:16:40 +0000 Subject: [PATCH 506/661] Style check --- tests/integration/helpers/cluster.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index 7f0a9154be9..2e38aec3512 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -2971,6 +2971,7 @@ class ClickHouseCluster: "Trying to create Azurite instance by command %s", " ".join(map(str, azurite_start_cmd)), ) + def logging_azurite_initialization(exception, retry_number, sleep_time): logging.info( f"Azurite initialization failed with error: {exception}" From 1f1f0528ce3a1fb20ceee5513523787a14718b80 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Wed, 31 Jul 2024 14:32:07 +0000 Subject: [PATCH 507/661] Prefer constant to INPUT in PlannerActionsVisitor. --- src/Planner/PlannerActionsVisitor.cpp | 11 +++++++- ...lyzer_materialized_constants_bug.reference | 3 +++ ...15_analyzer_materialized_constants_bug.sql | 26 +++++++++++++++++++ 3 files changed, 39 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/03215_analyzer_materialized_constants_bug.reference create mode 100644 tests/queries/0_stateless/03215_analyzer_materialized_constants_bug.sql diff --git a/src/Planner/PlannerActionsVisitor.cpp b/src/Planner/PlannerActionsVisitor.cpp index 1960855792c..57457493844 100644 --- a/src/Planner/PlannerActionsVisitor.cpp +++ b/src/Planner/PlannerActionsVisitor.cpp @@ -491,7 +491,16 @@ public: { auto it = node_name_to_node.find(node_name); if (it != node_name_to_node.end()) - return it->second; + { + /// It is possible that ActionsDAG already has an input with the same name as constant. + /// In this case, prefer constant to input. + /// Constatns affect function return type, which should be consistent with QueryTree. + /// Query example: + /// SELECT materialize(toLowCardinality('b')) || 'a' FROM remote('127.0.0.{1,2}', system, one) GROUP BY 'a' + bool materialized_input = it->second->type == ActionsDAG::ActionType::INPUT && !it->second->column; + if (!materialized_input) + return it->second; + } const auto * node = &actions_dag.addColumn(column); node_name_to_node[node->result_name] = node; diff --git a/tests/queries/0_stateless/03215_analyzer_materialized_constants_bug.reference b/tests/queries/0_stateless/03215_analyzer_materialized_constants_bug.reference new file mode 100644 index 00000000000..584e34c0cde --- /dev/null +++ b/tests/queries/0_stateless/03215_analyzer_materialized_constants_bug.reference @@ -0,0 +1,3 @@ +ba +\N +1 111111111111111111111111111111111111111 diff --git a/tests/queries/0_stateless/03215_analyzer_materialized_constants_bug.sql b/tests/queries/0_stateless/03215_analyzer_materialized_constants_bug.sql new file mode 100644 index 00000000000..f9ec28d09d8 --- /dev/null +++ b/tests/queries/0_stateless/03215_analyzer_materialized_constants_bug.sql @@ -0,0 +1,26 @@ +SET allow_experimental_analyzer = 1; + +SELECT concat(materialize(toLowCardinality('b')), 'a') FROM remote('127.0.0.{1,2}', system, one) GROUP BY 'a'; + +SELECT concat(NULLIF(1, materialize(toLowCardinality(1))), concat(NULLIF(1, 1))) FROM remote('127.0.0.{1,2}', system, one) GROUP BY concat(NULLIF(1, 1)); + +DROP TABLE IF EXISTS test__fuzz_21; +CREATE TABLE test__fuzz_21 +( + `x` Decimal(18, 10) +) +ENGINE = MergeTree +ORDER BY x; + +INSERT INTO test__fuzz_21 VALUES (1), (2), (3); + +WITH ( + SELECT CAST(toFixedString(toFixedString(materialize(toFixedString('111111111111111111111111111111111111111', 39)), 39), 39), 'UInt128') + ) AS v +SELECT + coalesce(materialize(toLowCardinality(toNullable(1))), 10, NULL), + max(v) +FROM remote('127.0.0.{1,2}', default, test__fuzz_21) +GROUP BY + coalesce(NULL), + coalesce(1, 10, 10, materialize(NULL)); From e31569a065d4c81cdea671727c39983d7f3a84e5 Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Wed, 31 Jul 2024 16:32:37 +0200 Subject: [PATCH 508/661] Expect an unknown cluster --- .../0_stateless/03215_analyzer_replace_with_dummy_tables.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/03215_analyzer_replace_with_dummy_tables.sql b/tests/queries/0_stateless/03215_analyzer_replace_with_dummy_tables.sql index 12d2bd627a7..6d084c2ac50 100644 --- a/tests/queries/0_stateless/03215_analyzer_replace_with_dummy_tables.sql +++ b/tests/queries/0_stateless/03215_analyzer_replace_with_dummy_tables.sql @@ -12,4 +12,4 @@ FROM ) FROM t ) -SETTINGS allow_experimental_parallel_reading_from_replicas = 1, max_parallel_replicas = 2, allow_experimental_analyzer = 1; +SETTINGS allow_experimental_parallel_reading_from_replicas = 1, max_parallel_replicas = 2, allow_experimental_analyzer = 1; -- { serverError CLUSTER_DOESNT_EXIST } From 8c36fbf4eddeba9282b53f726976b55f62d3ee19 Mon Sep 17 00:00:00 2001 From: divanik Date: Wed, 31 Jul 2024 14:42:38 +0000 Subject: [PATCH 509/661] Remove unnecessary change --- access/quotas.list | Bin 0 -> 1 bytes access/roles.list | Bin 0 -> 1 bytes access/row_policies.list | Bin 0 -> 1 bytes access/settings_profiles.list | Bin 0 -> 1 bytes access/users.list | Bin 0 -> 1 bytes .../03164_s3_settings_for_queries_and_merges.sql | 4 ++-- 6 files changed, 2 insertions(+), 2 deletions(-) create mode 100644 access/quotas.list create mode 100644 access/roles.list create mode 100644 access/row_policies.list create mode 100644 access/settings_profiles.list create mode 100644 access/users.list diff --git a/access/quotas.list b/access/quotas.list new file mode 100644 index 0000000000000000000000000000000000000000..f76dd238ade08917e6712764a16a22005a50573d GIT binary patch literal 1 IcmZPo000310RR91 literal 0 HcmV?d00001 diff --git a/access/roles.list b/access/roles.list new file mode 100644 index 0000000000000000000000000000000000000000..f76dd238ade08917e6712764a16a22005a50573d GIT binary patch literal 1 IcmZPo000310RR91 literal 0 HcmV?d00001 diff --git a/access/row_policies.list b/access/row_policies.list new file mode 100644 index 0000000000000000000000000000000000000000..f76dd238ade08917e6712764a16a22005a50573d GIT binary patch literal 1 IcmZPo000310RR91 literal 0 HcmV?d00001 diff --git a/access/settings_profiles.list b/access/settings_profiles.list new file mode 100644 index 0000000000000000000000000000000000000000..f76dd238ade08917e6712764a16a22005a50573d GIT binary patch literal 1 IcmZPo000310RR91 literal 0 HcmV?d00001 diff --git a/access/users.list b/access/users.list new file mode 100644 index 0000000000000000000000000000000000000000..f76dd238ade08917e6712764a16a22005a50573d GIT binary patch literal 1 IcmZPo000310RR91 literal 0 HcmV?d00001 diff --git a/tests/queries/0_stateless/03164_s3_settings_for_queries_and_merges.sql b/tests/queries/0_stateless/03164_s3_settings_for_queries_and_merges.sql index e43c9ae7717..94e390537df 100644 --- a/tests/queries/0_stateless/03164_s3_settings_for_queries_and_merges.sql +++ b/tests/queries/0_stateless/03164_s3_settings_for_queries_and_merges.sql @@ -25,7 +25,7 @@ SELECT ProfileEvents['S3ReadRequestsCount'] - ProfileEvents['S3ReadRequestsError'], ProfileEvents['ReadBufferFromS3Bytes'] < ProfileEvents['ReadCompressedBytes'] * 1.1 FROM system.query_log -WHERE type = 'QueryFinish' +WHERE event_date >= yesterday() AND type = 'QueryFinish' AND current_database = currentDatabase() AND query ilike '%INSERT INTO t_compact_bytes_s3 SELECT number, number, number%'; @@ -33,7 +33,7 @@ SELECT ProfileEvents['S3ReadRequestsCount'] - ProfileEvents['S3ReadRequestsError'], ProfileEvents['ReadBufferFromS3Bytes'] < ProfileEvents['ReadCompressedBytes'] * 1.1 FROM system.query_log -WHERE type = 'QueryFinish' +WHERE event_date >= yesterday() AND type = 'QueryFinish' AND current_database = currentDatabase() AND query ilike '%OPTIMIZE TABLE t_compact_bytes_s3 FINAL%'; From 20ec27f9dc79d7ee81cd06f1587de83c8ce81441 Mon Sep 17 00:00:00 2001 From: divanik Date: Wed, 31 Jul 2024 14:46:53 +0000 Subject: [PATCH 510/661] Remove trach dir --- access/quotas.list | Bin 1 -> 0 bytes access/roles.list | Bin 1 -> 0 bytes access/row_policies.list | Bin 1 -> 0 bytes access/settings_profiles.list | Bin 1 -> 0 bytes access/users.list | Bin 1 -> 0 bytes 5 files changed, 0 insertions(+), 0 deletions(-) delete mode 100644 access/quotas.list delete mode 100644 access/roles.list delete mode 100644 access/row_policies.list delete mode 100644 access/settings_profiles.list delete mode 100644 access/users.list diff --git a/access/quotas.list b/access/quotas.list deleted file mode 100644 index f76dd238ade08917e6712764a16a22005a50573d..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1 IcmZPo000310RR91 diff --git a/access/roles.list b/access/roles.list deleted file mode 100644 index f76dd238ade08917e6712764a16a22005a50573d..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1 IcmZPo000310RR91 diff --git a/access/row_policies.list b/access/row_policies.list deleted file mode 100644 index f76dd238ade08917e6712764a16a22005a50573d..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1 IcmZPo000310RR91 diff --git a/access/settings_profiles.list b/access/settings_profiles.list deleted file mode 100644 index f76dd238ade08917e6712764a16a22005a50573d..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1 IcmZPo000310RR91 diff --git a/access/users.list b/access/users.list deleted file mode 100644 index f76dd238ade08917e6712764a16a22005a50573d..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1 IcmZPo000310RR91 From 650737890299f8cad2c77ad46022ee0a37b284eb Mon Sep 17 00:00:00 2001 From: divanik Date: Wed, 31 Jul 2024 14:49:30 +0000 Subject: [PATCH 511/661] Fix erroe with profile event name --- .../0_stateless/03164_s3_settings_for_queries_and_merges.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/03164_s3_settings_for_queries_and_merges.sql b/tests/queries/0_stateless/03164_s3_settings_for_queries_and_merges.sql index 94e390537df..001ef382850 100644 --- a/tests/queries/0_stateless/03164_s3_settings_for_queries_and_merges.sql +++ b/tests/queries/0_stateless/03164_s3_settings_for_queries_and_merges.sql @@ -22,7 +22,7 @@ SELECT count() FROM t_compact_bytes_s3 WHERE NOT ignore(c2, c4); SYSTEM FLUSH LOGS; SELECT - ProfileEvents['S3ReadRequestsCount'] - ProfileEvents['S3ReadRequestsError'], + ProfileEvents['S3ReadRequestsCount'] - ProfileEvents['S3ReadRequestsErrors'], ProfileEvents['ReadBufferFromS3Bytes'] < ProfileEvents['ReadCompressedBytes'] * 1.1 FROM system.query_log WHERE event_date >= yesterday() AND type = 'QueryFinish' @@ -30,7 +30,7 @@ WHERE event_date >= yesterday() AND type = 'QueryFinish' AND query ilike '%INSERT INTO t_compact_bytes_s3 SELECT number, number, number%'; SELECT - ProfileEvents['S3ReadRequestsCount'] - ProfileEvents['S3ReadRequestsError'], + ProfileEvents['S3ReadRequestsCount'] - ProfileEvents['S3ReadRequestsErrors'], ProfileEvents['ReadBufferFromS3Bytes'] < ProfileEvents['ReadCompressedBytes'] * 1.1 FROM system.query_log WHERE event_date >= yesterday() AND type = 'QueryFinish' From c3c653e7692a755c3467b77e866555734d50ef50 Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Wed, 31 Jul 2024 14:58:52 +0000 Subject: [PATCH 512/661] Better --- src/Interpreters/DatabaseCatalog.cpp | 2 ++ .../config.d/database_catalog_drop_table_concurrency.xml | 3 +++ tests/config/install.sh | 1 + 3 files changed, 6 insertions(+) create mode 100644 tests/config/config.d/database_catalog_drop_table_concurrency.xml diff --git a/src/Interpreters/DatabaseCatalog.cpp b/src/Interpreters/DatabaseCatalog.cpp index a8e5fd7e6aa..48b01a9df43 100644 --- a/src/Interpreters/DatabaseCatalog.cpp +++ b/src/Interpreters/DatabaseCatalog.cpp @@ -1406,6 +1406,8 @@ void DatabaseCatalog::waitTableFinallyDropped(const UUID & uuid) return !tables_marked_dropped_ids.contains(uuid) || is_shutting_down; }); + LOG_DEBUG(log, "Done waiting for the table {} to be dropped. The outcome: {}", toString(uuid), tables_marked_dropped_ids).contains(uuid) ? "table still exists" : "table dropped successfully"); + /// TSA doesn't support unique_lock if (TSA_SUPPRESS_WARNING_FOR_READ(tables_marked_dropped_ids).contains(uuid)) throw Exception(ErrorCodes::UNFINISHED, "Did not finish dropping the table with UUID {} because the server is shutting down, " diff --git a/tests/config/config.d/database_catalog_drop_table_concurrency.xml b/tests/config/config.d/database_catalog_drop_table_concurrency.xml new file mode 100644 index 00000000000..ac118625f4e --- /dev/null +++ b/tests/config/config.d/database_catalog_drop_table_concurrency.xml @@ -0,0 +1,3 @@ + + 256 + diff --git a/tests/config/install.sh b/tests/config/install.sh index 1b0edc5fc16..7c4b36dc4bd 100755 --- a/tests/config/install.sh +++ b/tests/config/install.sh @@ -21,6 +21,7 @@ ln -sf $SRC_PATH/config.d/listen.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/text_log.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/blob_storage_log.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/custom_settings_prefixes.xml $DEST_SERVER_PATH/config.d/ +ln -sf $SRC_PATH/config.d/database_catalog_drop_table_concurrency.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/enable_access_control_improvements.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/macros.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/secure_ports.xml $DEST_SERVER_PATH/config.d/ From 19cd00000373a5707178214744444b4d8c4034a5 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 31 Jul 2024 17:18:55 +0200 Subject: [PATCH 513/661] Update src/Interpreters/DatabaseCatalog.cpp --- src/Interpreters/DatabaseCatalog.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Interpreters/DatabaseCatalog.cpp b/src/Interpreters/DatabaseCatalog.cpp index 48b01a9df43..56d9c323d39 100644 --- a/src/Interpreters/DatabaseCatalog.cpp +++ b/src/Interpreters/DatabaseCatalog.cpp @@ -1406,7 +1406,7 @@ void DatabaseCatalog::waitTableFinallyDropped(const UUID & uuid) return !tables_marked_dropped_ids.contains(uuid) || is_shutting_down; }); - LOG_DEBUG(log, "Done waiting for the table {} to be dropped. The outcome: {}", toString(uuid), tables_marked_dropped_ids).contains(uuid) ? "table still exists" : "table dropped successfully"); + LOG_DEBUG(log, "Done waiting for the table {} to be dropped. The outcome: {}", toString(uuid), tables_marked_dropped_ids.contains(uuid) ? "table still exists" : "table dropped successfully"); /// TSA doesn't support unique_lock if (TSA_SUPPRESS_WARNING_FOR_READ(tables_marked_dropped_ids).contains(uuid)) From f032c015ca9ec10b7938bbf3d67bb6181776d24a Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Wed, 31 Jul 2024 15:40:37 +0000 Subject: [PATCH 514/661] Ignore some tests --- tests/queries/0_stateless/00705_drop_create_merge_tree.sh | 4 ++-- .../0_stateless/01019_alter_materialized_view_atomic.sh | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/queries/0_stateless/00705_drop_create_merge_tree.sh b/tests/queries/0_stateless/00705_drop_create_merge_tree.sh index fd002668696..ea8b9d02e49 100755 --- a/tests/queries/0_stateless/00705_drop_create_merge_tree.sh +++ b/tests/queries/0_stateless/00705_drop_create_merge_tree.sh @@ -5,8 +5,8 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -yes 'CREATE TABLE IF NOT EXISTS table (x UInt8) ENGINE = MergeTree ORDER BY tuple();' | head -n 1000 | $CLICKHOUSE_CLIENT & -yes 'DROP TABLE IF EXISTS table;' | head -n 1000 | $CLICKHOUSE_CLIENT & +yes 'CREATE TABLE IF NOT EXISTS table (x UInt8) ENGINE = MergeTree ORDER BY tuple();' | head -n 1000 | $CLICKHOUSE_CLIENT --multiquery & +yes 'DROP TABLE IF EXISTS table;' | head -n 1000 | $CLICKHOUSE_CLIENT --multiquery & wait ${CLICKHOUSE_CLIENT} --query "DROP TABLE IF EXISTS table" diff --git a/tests/queries/0_stateless/01019_alter_materialized_view_atomic.sh b/tests/queries/0_stateless/01019_alter_materialized_view_atomic.sh index eb12a76eb62..4bd21fcee02 100755 --- a/tests/queries/0_stateless/01019_alter_materialized_view_atomic.sh +++ b/tests/queries/0_stateless/01019_alter_materialized_view_atomic.sh @@ -7,7 +7,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -$CLICKHOUSE_CLIENT < Date: Wed, 31 Jul 2024 15:41:37 +0000 Subject: [PATCH 515/661] Fix build --- src/Interpreters/DatabaseCatalog.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/Interpreters/DatabaseCatalog.cpp b/src/Interpreters/DatabaseCatalog.cpp index 56d9c323d39..273e5720679 100644 --- a/src/Interpreters/DatabaseCatalog.cpp +++ b/src/Interpreters/DatabaseCatalog.cpp @@ -1406,10 +1406,11 @@ void DatabaseCatalog::waitTableFinallyDropped(const UUID & uuid) return !tables_marked_dropped_ids.contains(uuid) || is_shutting_down; }); - LOG_DEBUG(log, "Done waiting for the table {} to be dropped. The outcome: {}", toString(uuid), tables_marked_dropped_ids.contains(uuid) ? "table still exists" : "table dropped successfully"); - /// TSA doesn't support unique_lock - if (TSA_SUPPRESS_WARNING_FOR_READ(tables_marked_dropped_ids).contains(uuid)) + const bool has_table = TSA_SUPPRESS_WARNING_FOR_READ(tables_marked_dropped_ids).contains(uuid); + LOG_DEBUG(log, "Done waiting for the table {} to be dropped. The outcome: {}", toString(uuid), has_table ? "table still exists" : "table dropped successfully"); + + if has_table) throw Exception(ErrorCodes::UNFINISHED, "Did not finish dropping the table with UUID {} because the server is shutting down, " "will finish after restart", uuid); } From 8afe61e04581d0b95ac2d6e927bb9d2427247c7a Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Wed, 31 Jul 2024 17:58:41 +0200 Subject: [PATCH 516/661] Better --- src/Interpreters/DatabaseCatalog.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Interpreters/DatabaseCatalog.cpp b/src/Interpreters/DatabaseCatalog.cpp index 273e5720679..fb4fad85f66 100644 --- a/src/Interpreters/DatabaseCatalog.cpp +++ b/src/Interpreters/DatabaseCatalog.cpp @@ -1410,7 +1410,7 @@ void DatabaseCatalog::waitTableFinallyDropped(const UUID & uuid) const bool has_table = TSA_SUPPRESS_WARNING_FOR_READ(tables_marked_dropped_ids).contains(uuid); LOG_DEBUG(log, "Done waiting for the table {} to be dropped. The outcome: {}", toString(uuid), has_table ? "table still exists" : "table dropped successfully"); - if has_table) + if (has_table) throw Exception(ErrorCodes::UNFINISHED, "Did not finish dropping the table with UUID {} because the server is shutting down, " "will finish after restart", uuid); } From 743d63767a74f41b3628c52ccf166be773baecf2 Mon Sep 17 00:00:00 2001 From: serxa Date: Wed, 31 Jul 2024 17:06:49 +0000 Subject: [PATCH 517/661] fix AsyncLoader destruction race --- src/Common/AsyncLoader.cpp | 33 ++++++++++++++++++++------------- 1 file changed, 20 insertions(+), 13 deletions(-) diff --git a/src/Common/AsyncLoader.cpp b/src/Common/AsyncLoader.cpp index 6264eb03106..d40e320e741 100644 --- a/src/Common/AsyncLoader.cpp +++ b/src/Common/AsyncLoader.cpp @@ -218,20 +218,27 @@ AsyncLoader::~AsyncLoader() { // All `LoadTask` objects should be destructed before AsyncLoader destruction because they hold a reference. // To make sure we check for all pending jobs to be finished. - std::unique_lock lock{mutex}; - if (scheduled_jobs.empty() && finished_jobs.empty()) - return; + { + std::unique_lock lock{mutex}; + if (!scheduled_jobs.empty() || !finished_jobs.empty()) + { + std::vector scheduled; + std::vector finished; + scheduled.reserve(scheduled_jobs.size()); + finished.reserve(finished_jobs.size()); + for (const auto & [job, _] : scheduled_jobs) + scheduled.push_back(job->name); + for (const auto & job : finished_jobs) + finished.push_back(job->name); + LOG_ERROR(log, "Bug. Destruction with pending ({}) and finished ({}) load jobs.", fmt::join(scheduled, ", "), fmt::join(finished, ", ")); + abort(); + } + } - std::vector scheduled; - std::vector finished; - scheduled.reserve(scheduled_jobs.size()); - finished.reserve(finished_jobs.size()); - for (const auto & [job, _] : scheduled_jobs) - scheduled.push_back(job->name); - for (const auto & job : finished_jobs) - finished.push_back(job->name); - LOG_ERROR(log, "Bug. Destruction with pending ({}) and finished ({}) load jobs.", fmt::join(scheduled, ", "), fmt::join(finished, ", ")); - abort(); + // When all jobs are done we could still have finalizing workers. + // These workers could call updateCurrentPriorityAndSpawn() that scans all pools. + // We need to stop all of them before destructing any of them. + stop(); } void AsyncLoader::start() From 2b79da36c0701bb9ca392fddd9129a7e0e04ef3f Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Wed, 31 Jul 2024 19:26:45 +0200 Subject: [PATCH 518/661] Update 01605_adaptive_granularity_block_borders.sql --- .../0_stateless/01605_adaptive_granularity_block_borders.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01605_adaptive_granularity_block_borders.sql b/tests/queries/0_stateless/01605_adaptive_granularity_block_borders.sql index 5f09dc423b2..f9b8bb1c1c6 100644 --- a/tests/queries/0_stateless/01605_adaptive_granularity_block_borders.sql +++ b/tests/queries/0_stateless/01605_adaptive_granularity_block_borders.sql @@ -1,4 +1,4 @@ --- Tags: no-random-merge-tree-settings, no-tsan, no-debug, no-object-storage, no-distributed-cache +-- Tags: long, no-random-merge-tree-settings, no-tsan, no-debug, no-object-storage, no-distributed-cache -- no-tsan: too slow -- no-object-storage: for remote tables we use thread pool even when reading with one stream, so memory consumption is higher From dde274f6fad979aa94ea31395b0434c81f72328a Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Wed, 31 Jul 2024 18:08:14 +0000 Subject: [PATCH 519/661] Re-enable ICU on s390/x --- contrib/icu-cmake/CMakeLists.txt | 4 +--- contrib/icudata | 2 +- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/contrib/icu-cmake/CMakeLists.txt b/contrib/icu-cmake/CMakeLists.txt index f9d05f7fe97..adeaa7dcf33 100644 --- a/contrib/icu-cmake/CMakeLists.txt +++ b/contrib/icu-cmake/CMakeLists.txt @@ -4,9 +4,7 @@ else () option(ENABLE_ICU "Enable ICU" 0) endif () -# Temporarily disabled s390x because the ICU build links a blob (icudt71b_dat.S) and our friends from IBM did not explain how they generated -# the blob on s390x: https://github.com/ClickHouse/icudata/pull/2#issuecomment-2226957255 -if (NOT ENABLE_ICU OR ARCH_S390X) +if (NOT ENABLE_ICU) message(STATUS "Not using ICU") return() endif() diff --git a/contrib/icudata b/contrib/icudata index d345d6ac22f..4904951339a 160000 --- a/contrib/icudata +++ b/contrib/icudata @@ -1 +1 @@ -Subproject commit d345d6ac22f381c882420de9053d30ae1ff38d75 +Subproject commit 4904951339a70b4814d2d3723436b20d079cb01b From e2af1766eb1ea0ee2f6b862f53a0d3c13f53365b Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Wed, 31 Jul 2024 20:25:28 +0200 Subject: [PATCH 520/661] init --- src/Functions/DateTimeTransforms.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/Functions/DateTimeTransforms.h b/src/Functions/DateTimeTransforms.h index a7bd398cdaa..fe26c5cf353 100644 --- a/src/Functions/DateTimeTransforms.h +++ b/src/Functions/DateTimeTransforms.h @@ -381,11 +381,13 @@ struct ToStartOfWeekImpl static UInt16 execute(Int64 t, UInt8 week_mode, const DateLUTImpl & time_zone) { - return time_zone.toFirstDayNumOfWeek(time_zone.toDayNum(t), week_mode); + const auto & res = time_zone.toFirstDayNumOfWeek(time_zone.toDayNum(t), week_mode); + return res >= 0 ? res : 0; } static UInt16 execute(UInt32 t, UInt8 week_mode, const DateLUTImpl & time_zone) { - return time_zone.toFirstDayNumOfWeek(time_zone.toDayNum(t), week_mode); + const auto & res = time_zone.toFirstDayNumOfWeek(time_zone.toDayNum(t), week_mode); + return res >= 0 ? res : 0; } static UInt16 execute(Int32 d, UInt8 week_mode, const DateLUTImpl & time_zone) { From d0c643180f408d84a5f10a917f413248b9267202 Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Wed, 31 Jul 2024 20:29:36 +0200 Subject: [PATCH 521/661] add tests --- .../03215_toStartOfWeek_with_dateTime64_fix.reference | 2 ++ .../0_stateless/03215_toStartOfWeek_with_dateTime64_fix.sql | 2 ++ 2 files changed, 4 insertions(+) create mode 100644 tests/queries/0_stateless/03215_toStartOfWeek_with_dateTime64_fix.reference create mode 100644 tests/queries/0_stateless/03215_toStartOfWeek_with_dateTime64_fix.sql diff --git a/tests/queries/0_stateless/03215_toStartOfWeek_with_dateTime64_fix.reference b/tests/queries/0_stateless/03215_toStartOfWeek_with_dateTime64_fix.reference new file mode 100644 index 00000000000..fd698107f22 --- /dev/null +++ b/tests/queries/0_stateless/03215_toStartOfWeek_with_dateTime64_fix.reference @@ -0,0 +1,2 @@ +1970-01-01 +1970-01-01 diff --git a/tests/queries/0_stateless/03215_toStartOfWeek_with_dateTime64_fix.sql b/tests/queries/0_stateless/03215_toStartOfWeek_with_dateTime64_fix.sql new file mode 100644 index 00000000000..0f00a52cb86 --- /dev/null +++ b/tests/queries/0_stateless/03215_toStartOfWeek_with_dateTime64_fix.sql @@ -0,0 +1,2 @@ +SELECT toStartOfWeek(toDateTime64('1970-02-01', 6)); +SELECT toStartOfWeek(toDateTime('1970-01-01')); From 636c3f642340de6e5ca4892481ca156cb236a4cd Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Wed, 31 Jul 2024 20:31:22 +0200 Subject: [PATCH 522/661] Update DateTimeTransforms.h --- src/Functions/DateTimeTransforms.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Functions/DateTimeTransforms.h b/src/Functions/DateTimeTransforms.h index fe26c5cf353..1970ec3bdb0 100644 --- a/src/Functions/DateTimeTransforms.h +++ b/src/Functions/DateTimeTransforms.h @@ -382,12 +382,12 @@ struct ToStartOfWeekImpl static UInt16 execute(Int64 t, UInt8 week_mode, const DateLUTImpl & time_zone) { const auto & res = time_zone.toFirstDayNumOfWeek(time_zone.toDayNum(t), week_mode); - return res >= 0 ? res : 0; + return std::max(res, 0); } static UInt16 execute(UInt32 t, UInt8 week_mode, const DateLUTImpl & time_zone) { const auto & res = time_zone.toFirstDayNumOfWeek(time_zone.toDayNum(t), week_mode); - return res >= 0 ? res : 0; + return std::max(res, 0); } static UInt16 execute(Int32 d, UInt8 week_mode, const DateLUTImpl & time_zone) { From 34ca3128ed0de60e03a105c43dc0924541f4a2c1 Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Wed, 31 Jul 2024 20:36:13 +0200 Subject: [PATCH 523/661] Update DateTimeTransforms.h --- src/Functions/DateTimeTransforms.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Functions/DateTimeTransforms.h b/src/Functions/DateTimeTransforms.h index 1970ec3bdb0..46fb3bb9f57 100644 --- a/src/Functions/DateTimeTransforms.h +++ b/src/Functions/DateTimeTransforms.h @@ -381,12 +381,12 @@ struct ToStartOfWeekImpl static UInt16 execute(Int64 t, UInt8 week_mode, const DateLUTImpl & time_zone) { - const auto & res = time_zone.toFirstDayNumOfWeek(time_zone.toDayNum(t), week_mode); + const auto res = time_zone.toFirstDayNumOfWeek(time_zone.toDayNum(t), week_mode); return std::max(res, 0); } static UInt16 execute(UInt32 t, UInt8 week_mode, const DateLUTImpl & time_zone) { - const auto & res = time_zone.toFirstDayNumOfWeek(time_zone.toDayNum(t), week_mode); + const auto res = time_zone.toFirstDayNumOfWeek(time_zone.toDayNum(t), week_mode); return std::max(res, 0); } static UInt16 execute(Int32 d, UInt8 week_mode, const DateLUTImpl & time_zone) From 9def8cea8121ae8001649629a96e73eb1e10159b Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Wed, 31 Jul 2024 18:57:08 +0000 Subject: [PATCH 524/661] Update version_date.tsv and changelogs after v24.4.4.107-stable --- docs/changelogs/v24.4.4.107-stable.md | 70 +++++++++++++++++++++++++++ utils/list-versions/version_date.tsv | 1 + 2 files changed, 71 insertions(+) create mode 100644 docs/changelogs/v24.4.4.107-stable.md diff --git a/docs/changelogs/v24.4.4.107-stable.md b/docs/changelogs/v24.4.4.107-stable.md new file mode 100644 index 00000000000..ba7c576715e --- /dev/null +++ b/docs/changelogs/v24.4.4.107-stable.md @@ -0,0 +1,70 @@ +--- +sidebar_position: 1 +sidebar_label: 2024 +--- + +# 2024 Changelog + +### ClickHouse release v24.4.4.107-stable (af0ed6b197e) FIXME as compared to v24.4.3.25-stable (a915dd4eda4) + +#### Improvement +* Backported in [#65884](https://github.com/ClickHouse/ClickHouse/issues/65884): Always start Keeper with sufficient amount of threads in global thread pool. [#64444](https://github.com/ClickHouse/ClickHouse/pull/64444) ([Duc Canh Le](https://github.com/canhld94)). +* Backported in [#65303](https://github.com/ClickHouse/ClickHouse/issues/65303): Returned back the behaviour of how ClickHouse works and interprets Tuples in CSV format. This change effectively reverts https://github.com/ClickHouse/ClickHouse/pull/60994 and makes it available only under a few settings: `output_format_csv_serialize_tuple_into_separate_columns`, `input_format_csv_deserialize_separate_columns_into_tuple` and `input_format_csv_try_infer_strings_from_quoted_tuples`. [#65170](https://github.com/ClickHouse/ClickHouse/pull/65170) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Backported in [#65894](https://github.com/ClickHouse/ClickHouse/issues/65894): Respect cgroup CPU limit in Keeper. [#65819](https://github.com/ClickHouse/ClickHouse/pull/65819) ([Antonio Andelic](https://github.com/antonio2368)). + +#### Critical Bug Fix (crash, LOGICAL_ERROR, data loss, RBAC) +* Backported in [#65372](https://github.com/ClickHouse/ClickHouse/issues/65372): Fix a bug in ClickHouse Keeper that causes digest mismatch during closing session. [#65198](https://github.com/ClickHouse/ClickHouse/pull/65198) ([Aleksei Filatov](https://github.com/aalexfvk)). +* Backported in [#66883](https://github.com/ClickHouse/ClickHouse/issues/66883): Fix unexpeced size of low cardinality column in function calls. [#65298](https://github.com/ClickHouse/ClickHouse/pull/65298) ([Raúl Marín](https://github.com/Algunenano)). +* Backported in [#65435](https://github.com/ClickHouse/ClickHouse/issues/65435): Forbid `QUALIFY` clause in the old analyzer. The old analyzer ignored `QUALIFY`, so it could lead to unexpected data removal in mutations. [#65356](https://github.com/ClickHouse/ClickHouse/pull/65356) ([Dmitry Novik](https://github.com/novikd)). +* Backported in [#65448](https://github.com/ClickHouse/ClickHouse/issues/65448): Use correct memory alignment for Distinct combinator. Previously, crash could happen because of invalid memory allocation when the combinator was used. [#65379](https://github.com/ClickHouse/ClickHouse/pull/65379) ([Antonio Andelic](https://github.com/antonio2368)). +* Backported in [#65710](https://github.com/ClickHouse/ClickHouse/issues/65710): Fix crash in maxIntersections. [#65689](https://github.com/ClickHouse/ClickHouse/pull/65689) ([Raúl Marín](https://github.com/Algunenano)). +* Backported in [#66689](https://github.com/ClickHouse/ClickHouse/issues/66689): Fix the VALID UNTIL clause in the user definition resetting after a restart. Closes [#66405](https://github.com/ClickHouse/ClickHouse/issues/66405). [#66409](https://github.com/ClickHouse/ClickHouse/pull/66409) ([Nikolay Degterinsky](https://github.com/evillique)). + +#### Bug Fix (user-visible misbehavior in an official stable release) +* Backported in [#65353](https://github.com/ClickHouse/ClickHouse/issues/65353): Fix possible abort on uncaught exception in ~WriteBufferFromFileDescriptor in StatusFile. [#64206](https://github.com/ClickHouse/ClickHouse/pull/64206) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#65060](https://github.com/ClickHouse/ClickHouse/issues/65060): Fix the `Expression nodes list expected 1 projection names` and `Unknown expression or identifier` errors for queries with aliases to `GLOBAL IN.`. [#64517](https://github.com/ClickHouse/ClickHouse/pull/64517) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#65329](https://github.com/ClickHouse/ClickHouse/issues/65329): Fix the crash loop when restoring from backup is blocked by creating an MV with a definer that hasn't been restored yet. [#64595](https://github.com/ClickHouse/ClickHouse/pull/64595) ([pufit](https://github.com/pufit)). +* Backported in [#64833](https://github.com/ClickHouse/ClickHouse/issues/64833): Fix bug which could lead to non-working TTLs with expressions. [#64694](https://github.com/ClickHouse/ClickHouse/pull/64694) ([alesapin](https://github.com/alesapin)). +* Backported in [#65086](https://github.com/ClickHouse/ClickHouse/issues/65086): Fix removing the `WHERE` and `PREWHERE` expressions, which are always true (for the new analyzer). [#64695](https://github.com/ClickHouse/ClickHouse/pull/64695) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#65540](https://github.com/ClickHouse/ClickHouse/issues/65540): Fix crash for `ALTER TABLE ... ON CLUSTER ... MODIFY SQL SECURITY`. [#64957](https://github.com/ClickHouse/ClickHouse/pull/64957) ([pufit](https://github.com/pufit)). +* Backported in [#65578](https://github.com/ClickHouse/ClickHouse/issues/65578): Fix crash on destroying AccessControl: add explicit shutdown. [#64993](https://github.com/ClickHouse/ClickHouse/pull/64993) ([Vitaly Baranov](https://github.com/vitlibar)). +* Backported in [#65161](https://github.com/ClickHouse/ClickHouse/issues/65161): Fix pushing arithmetic operations out of aggregation. In the new analyzer, optimization was applied only once. [#65104](https://github.com/ClickHouse/ClickHouse/pull/65104) ([Dmitry Novik](https://github.com/novikd)). +* Backported in [#65616](https://github.com/ClickHouse/ClickHouse/issues/65616): Fix aggregate function name rewriting in the new analyzer. [#65110](https://github.com/ClickHouse/ClickHouse/pull/65110) ([Dmitry Novik](https://github.com/novikd)). +* Backported in [#65730](https://github.com/ClickHouse/ClickHouse/issues/65730): Eliminate injective function in argument of functions `uniq*` recursively. This used to work correctly but was broken in the new analyzer. [#65140](https://github.com/ClickHouse/ClickHouse/pull/65140) ([Duc Canh Le](https://github.com/canhld94)). +* Backported in [#65668](https://github.com/ClickHouse/ClickHouse/issues/65668): Disable `non-intersecting-parts` optimization for queries with `FINAL` in case of `read-in-order` optimization was enabled. This could lead to an incorrect query result. As a workaround, disable `do_not_merge_across_partitions_select_final` and `split_parts_ranges_into_intersecting_and_non_intersecting_final` before this fix is merged. [#65505](https://github.com/ClickHouse/ClickHouse/pull/65505) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#65786](https://github.com/ClickHouse/ClickHouse/issues/65786): Fixed bug in MergeJoin. Column in sparse serialisation might be treated as a column of its nested type though the required conversion wasn't performed. [#65632](https://github.com/ClickHouse/ClickHouse/pull/65632) ([Nikita Taranov](https://github.com/nickitat)). +* Backported in [#65810](https://github.com/ClickHouse/ClickHouse/issues/65810): Fix invalid exceptions in function `parseDateTime` with `%F` and `%D` placeholders. [#65768](https://github.com/ClickHouse/ClickHouse/pull/65768) ([Antonio Andelic](https://github.com/antonio2368)). +* Backported in [#65931](https://github.com/ClickHouse/ClickHouse/issues/65931): For queries that read from `PostgreSQL`, cancel the internal `PostgreSQL` query if the ClickHouse query is finished. Otherwise, `ClickHouse` query cannot be canceled until the internal `PostgreSQL` query is finished. [#65771](https://github.com/ClickHouse/ClickHouse/pull/65771) ([Maksim Kita](https://github.com/kitaisreal)). +* Backported in [#65826](https://github.com/ClickHouse/ClickHouse/issues/65826): Fix a bug in short circuit logic when old analyzer and dictGetOrDefault is used. [#65802](https://github.com/ClickHouse/ClickHouse/pull/65802) ([jsc0218](https://github.com/jsc0218)). +* Backported in [#66299](https://github.com/ClickHouse/ClickHouse/issues/66299): Better handling of join conditions involving `IS NULL` checks (for example `ON (a = b AND (a IS NOT NULL) AND (b IS NOT NULL) ) OR ( (a IS NULL) AND (b IS NULL) )` is rewritten to `ON a <=> b`), fix incorrect optimization when condition other then `IS NULL` are present. [#65835](https://github.com/ClickHouse/ClickHouse/pull/65835) ([vdimir](https://github.com/vdimir)). +* Backported in [#66326](https://github.com/ClickHouse/ClickHouse/issues/66326): Add missing settings `input_format_csv_skip_first_lines/input_format_tsv_skip_first_lines/input_format_csv_try_infer_numbers_from_strings/input_format_csv_try_infer_strings_from_quoted_tuples` in schema inference cache because they can change the resulting schema. It prevents from incorrect result of schema inference with these settings changed. [#65980](https://github.com/ClickHouse/ClickHouse/pull/65980) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#66153](https://github.com/ClickHouse/ClickHouse/issues/66153): Fixed buffer overflow bug in `unbin`/`unhex` implementation. [#66106](https://github.com/ClickHouse/ClickHouse/pull/66106) ([Nikita Taranov](https://github.com/nickitat)). +* Backported in [#66459](https://github.com/ClickHouse/ClickHouse/issues/66459): Fixed a bug in ZooKeeper client: a session could get stuck in unusable state after receiving a hardware error from ZooKeeper. For example, this might happen due to "soft memory limit" in ClickHouse Keeper. [#66140](https://github.com/ClickHouse/ClickHouse/pull/66140) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#66224](https://github.com/ClickHouse/ClickHouse/issues/66224): Fix issue in SumIfToCountIfVisitor and signed integers. [#66146](https://github.com/ClickHouse/ClickHouse/pull/66146) ([Raúl Marín](https://github.com/Algunenano)). +* Backported in [#66267](https://github.com/ClickHouse/ClickHouse/issues/66267): Don't throw `TIMEOUT_EXCEEDED` for `none_only_active` mode of `distributed_ddl_output_mode`. [#66218](https://github.com/ClickHouse/ClickHouse/pull/66218) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#66678](https://github.com/ClickHouse/ClickHouse/issues/66678): Fix handling limit for `system.numbers_mt` when no index can be used. [#66231](https://github.com/ClickHouse/ClickHouse/pull/66231) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Backported in [#66603](https://github.com/ClickHouse/ClickHouse/issues/66603): Fixed how the ClickHouse server detects the maximum number of usable CPU cores as specified by cgroups v2 if the server runs in a container such as Docker. In more detail, containers often run their process in the root cgroup which has an empty name. In that case, ClickHouse ignored the CPU limits set by cgroups v2. [#66237](https://github.com/ClickHouse/ClickHouse/pull/66237) ([filimonov](https://github.com/filimonov)). +* Backported in [#66358](https://github.com/ClickHouse/ClickHouse/issues/66358): Fix the `Not-ready set` error when a subquery with `IN` is used in the constraint. [#66261](https://github.com/ClickHouse/ClickHouse/pull/66261) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#66971](https://github.com/ClickHouse/ClickHouse/issues/66971): Fix `Column identifier is already registered` error with `group_by_use_nulls=true` and new analyzer. [#66400](https://github.com/ClickHouse/ClickHouse/pull/66400) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#66968](https://github.com/ClickHouse/ClickHouse/issues/66968): Fix `Cannot find column` error for queries with constant expression in `GROUP BY` key and new analyzer enabled. [#66433](https://github.com/ClickHouse/ClickHouse/pull/66433) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#66719](https://github.com/ClickHouse/ClickHouse/issues/66719): Correctly track memory for `Allocator::realloc`. [#66548](https://github.com/ClickHouse/ClickHouse/pull/66548) ([Antonio Andelic](https://github.com/antonio2368)). +* Backported in [#66950](https://github.com/ClickHouse/ClickHouse/issues/66950): Fix an invalid result for queries with `WINDOW`. This could happen when `PARTITION` columns have sparse serialization and window functions are executed in parallel. [#66579](https://github.com/ClickHouse/ClickHouse/pull/66579) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#66947](https://github.com/ClickHouse/ClickHouse/issues/66947): Fix `Method getResultType is not supported for QUERY query node` error when scalar subquery was used as the first argument of IN (with new analyzer). [#66655](https://github.com/ClickHouse/ClickHouse/pull/66655) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#67195](https://github.com/ClickHouse/ClickHouse/issues/67195): TRUNCATE DATABASE used to stop replication as if it was a DROP DATABASE query, it's fixed. [#67129](https://github.com/ClickHouse/ClickHouse/pull/67129) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#67377](https://github.com/ClickHouse/ClickHouse/issues/67377): Fix error `Cannot convert column because it is non constant in source stream but must be constant in result.` for a query that reads from the `Merge` table over the `Distriburted` table with one shard. [#67146](https://github.com/ClickHouse/ClickHouse/pull/67146) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#67240](https://github.com/ClickHouse/ClickHouse/issues/67240): This closes [#67156](https://github.com/ClickHouse/ClickHouse/issues/67156). This closes [#66447](https://github.com/ClickHouse/ClickHouse/issues/66447). The bug was introduced in https://github.com/ClickHouse/ClickHouse/pull/62907. [#67178](https://github.com/ClickHouse/ClickHouse/pull/67178) ([Maksim Kita](https://github.com/kitaisreal)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Backported in [#65410](https://github.com/ClickHouse/ClickHouse/issues/65410): Re-enable OpenSSL session caching. [#65111](https://github.com/ClickHouse/ClickHouse/pull/65111) ([Robert Schulze](https://github.com/rschu1ze)). +* Backported in [#65903](https://github.com/ClickHouse/ClickHouse/issues/65903): Fix bug with session closing in Keeper. [#65735](https://github.com/ClickHouse/ClickHouse/pull/65735) ([Antonio Andelic](https://github.com/antonio2368)). +* Backported in [#66385](https://github.com/ClickHouse/ClickHouse/issues/66385): Disable broken cases from 02911_join_on_nullsafe_optimization. [#66310](https://github.com/ClickHouse/ClickHouse/pull/66310) ([vdimir](https://github.com/vdimir)). +* Backported in [#66424](https://github.com/ClickHouse/ClickHouse/issues/66424): Ignore subquery for IN in DDLLoadingDependencyVisitor. [#66395](https://github.com/ClickHouse/ClickHouse/pull/66395) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#66542](https://github.com/ClickHouse/ClickHouse/issues/66542): Add additional log masking in CI. [#66523](https://github.com/ClickHouse/ClickHouse/pull/66523) ([Raúl Marín](https://github.com/Algunenano)). +* Backported in [#66857](https://github.com/ClickHouse/ClickHouse/issues/66857): Fix data race in S3::ClientCache. [#66644](https://github.com/ClickHouse/ClickHouse/pull/66644) ([Konstantin Morozov](https://github.com/k-morozov)). +* Backported in [#66873](https://github.com/ClickHouse/ClickHouse/issues/66873): Support one more case in JOIN ON ... IS NULL. [#66725](https://github.com/ClickHouse/ClickHouse/pull/66725) ([vdimir](https://github.com/vdimir)). +* Backported in [#67057](https://github.com/ClickHouse/ClickHouse/issues/67057): Increase asio pool size in case the server is tiny. [#66761](https://github.com/ClickHouse/ClickHouse/pull/66761) ([alesapin](https://github.com/alesapin)). +* Backported in [#66944](https://github.com/ClickHouse/ClickHouse/issues/66944): Small fix in realloc memory tracking. [#66820](https://github.com/ClickHouse/ClickHouse/pull/66820) ([Antonio Andelic](https://github.com/antonio2368)). +* Backported in [#67250](https://github.com/ClickHouse/ClickHouse/issues/67250): Followup [#66725](https://github.com/ClickHouse/ClickHouse/issues/66725). [#66869](https://github.com/ClickHouse/ClickHouse/pull/66869) ([vdimir](https://github.com/vdimir)). +* Backported in [#67410](https://github.com/ClickHouse/ClickHouse/issues/67410): CI: Fix build results for release branches. [#67402](https://github.com/ClickHouse/ClickHouse/pull/67402) ([Max K.](https://github.com/maxknv)). + diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv index 027b207d3ad..abd8f84ec74 100644 --- a/utils/list-versions/version_date.tsv +++ b/utils/list-versions/version_date.tsv @@ -5,6 +5,7 @@ v24.5.4.49-stable 2024-07-01 v24.5.3.5-stable 2024-06-13 v24.5.2.34-stable 2024-06-13 v24.5.1.1763-stable 2024-06-01 +v24.4.4.107-stable 2024-07-31 v24.4.3.25-stable 2024-06-14 v24.4.2.141-stable 2024-06-07 v24.4.1.2088-stable 2024-05-01 From cc27c254abd4b6fd8f64b47e0bdf6195041bd5ef Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 31 Jul 2024 17:38:20 +0200 Subject: [PATCH 525/661] Minor change --- src/Databases/DatabaseOnDisk.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Databases/DatabaseOnDisk.cpp b/src/Databases/DatabaseOnDisk.cpp index f419f5811a1..734f354d9a5 100644 --- a/src/Databases/DatabaseOnDisk.cpp +++ b/src/Databases/DatabaseOnDisk.cpp @@ -313,7 +313,7 @@ void DatabaseOnDisk::detachTablePermanently(ContextPtr query_context, const Stri std::lock_guard lock(mutex); if (const auto it = snapshot_detached_tables.find(table_name); it == snapshot_detached_tables.end()) { - throw Exception(ErrorCodes::LOGICAL_ERROR, "Snapshot doesn't contain info about detached table={}", table_name); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Snapshot doesn't contain info about detached table `{}`", table_name); } else { From 867784d55c989943d0c79eb9179b01e878fabcbe Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Wed, 31 Jul 2024 22:48:16 +0200 Subject: [PATCH 526/661] Update DateTimeTransforms.h --- src/Functions/DateTimeTransforms.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Functions/DateTimeTransforms.h b/src/Functions/DateTimeTransforms.h index 46fb3bb9f57..ce7da406e9a 100644 --- a/src/Functions/DateTimeTransforms.h +++ b/src/Functions/DateTimeTransforms.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include #include From 9cb52bd1381ad3e0929062801df7c4b542cf1117 Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Wed, 31 Jul 2024 23:11:35 +0200 Subject: [PATCH 527/661] fix build --- src/Functions/DateTimeTransforms.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/Functions/DateTimeTransforms.h b/src/Functions/DateTimeTransforms.h index ce7da406e9a..15f1b9580f3 100644 --- a/src/Functions/DateTimeTransforms.h +++ b/src/Functions/DateTimeTransforms.h @@ -1,6 +1,5 @@ #pragma once -#include #include #include #include @@ -25,7 +24,7 @@ namespace DB static constexpr auto millisecond_multiplier = 1'000; static constexpr auto microsecond_multiplier = 1'000'000; -static constexpr auto nanosecond_multiplier = 1'000'000'000; +static constexpr auto nanosecond_multiplier = 1'000'000'000; static constexpr FormatSettings::DateTimeOverflowBehavior default_date_time_overflow_behavior = FormatSettings::DateTimeOverflowBehavior::Ignore; @@ -382,12 +381,12 @@ struct ToStartOfWeekImpl static UInt16 execute(Int64 t, UInt8 week_mode, const DateLUTImpl & time_zone) { - const auto res = time_zone.toFirstDayNumOfWeek(time_zone.toDayNum(t), week_mode); + const int res = time_zone.toFirstDayNumOfWeek(time_zone.toDayNum(t), week_mode); return std::max(res, 0); } static UInt16 execute(UInt32 t, UInt8 week_mode, const DateLUTImpl & time_zone) { - const auto res = time_zone.toFirstDayNumOfWeek(time_zone.toDayNum(t), week_mode); + const int res = time_zone.toFirstDayNumOfWeek(time_zone.toDayNum(t), week_mode); return std::max(res, 0); } static UInt16 execute(Int32 d, UInt8 week_mode, const DateLUTImpl & time_zone) From 6403f3f545bee153ffaf4ce5bda6fcde33ef88d2 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 31 Jul 2024 23:29:54 +0200 Subject: [PATCH 528/661] Miscellaneous --- src/Common/Epoll.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Common/Epoll.cpp b/src/Common/Epoll.cpp index 49c86222cf0..ef7c6e143a0 100644 --- a/src/Common/Epoll.cpp +++ b/src/Common/Epoll.cpp @@ -19,7 +19,7 @@ Epoll::Epoll() : events_count(0) { epoll_fd = epoll_create1(0); if (epoll_fd == -1) - throw DB::ErrnoException(DB::ErrorCodes::EPOLL_ERROR, "Cannot open epoll descriptor"); + throw ErrnoException(ErrorCodes::EPOLL_ERROR, "Cannot open epoll descriptor"); } Epoll::Epoll(Epoll && other) noexcept : epoll_fd(other.epoll_fd), events_count(other.events_count.load()) @@ -47,7 +47,7 @@ void Epoll::add(int fd, void * ptr, uint32_t events) ++events_count; if (epoll_ctl(epoll_fd, EPOLL_CTL_ADD, fd, &event) == -1) - throw DB::ErrnoException(DB::ErrorCodes::EPOLL_ERROR, "Cannot add new descriptor to epoll"); + throw ErrnoException(ErrorCodes::EPOLL_ERROR, "Cannot add new descriptor to epoll"); } void Epoll::remove(int fd) @@ -55,7 +55,7 @@ void Epoll::remove(int fd) --events_count; if (epoll_ctl(epoll_fd, EPOLL_CTL_DEL, fd, nullptr) == -1) - throw DB::ErrnoException(DB::ErrorCodes::EPOLL_ERROR, "Cannot remove descriptor from epoll"); + throw ErrnoException(ErrorCodes::EPOLL_ERROR, "Cannot remove descriptor from epoll"); } size_t Epoll::getManyReady(int max_events, epoll_event * events_out, int timeout) const @@ -82,7 +82,7 @@ size_t Epoll::getManyReady(int max_events, epoll_event * events_out, int timeout continue; } else - throw DB::ErrnoException(DB::ErrorCodes::EPOLL_ERROR, "Error in epoll_wait"); + throw ErrnoException(ErrorCodes::EPOLL_ERROR, "Error in epoll_wait"); } else break; From 6e914ff6da67be1c1381ffed2d04b5758704baf3 Mon Sep 17 00:00:00 2001 From: Thom O'Connor Date: Wed, 31 Jul 2024 21:59:37 +0000 Subject: [PATCH 529/661] Update settings.md Removing duplicate header "## background_merges_mutations_scheduling_policy" --- docs/en/operations/server-configuration-parameters/settings.md | 2 -- 1 file changed, 2 deletions(-) diff --git a/docs/en/operations/server-configuration-parameters/settings.md b/docs/en/operations/server-configuration-parameters/settings.md index 8278f8c8699..a1e3c292b04 100644 --- a/docs/en/operations/server-configuration-parameters/settings.md +++ b/docs/en/operations/server-configuration-parameters/settings.md @@ -103,8 +103,6 @@ Default: 2 The policy on how to perform a scheduling for background merges and mutations. Possible values are: `round_robin` and `shortest_task_first`. -## background_merges_mutations_scheduling_policy - Algorithm used to select next merge or mutation to be executed by background thread pool. Policy may be changed at runtime without server restart. Could be applied from the `default` profile for backward compatibility. From f162d6bd5e03c6f717b4f45cf4c7ba6491aaa5fa Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Thu, 1 Aug 2024 06:35:22 +0000 Subject: [PATCH 530/661] Update version_date.tsv and changelogs after v24.7.2.13-stable --- docker/keeper/Dockerfile | 2 +- docker/server/Dockerfile.alpine | 2 +- docker/server/Dockerfile.ubuntu | 2 +- docs/changelogs/v24.7.2.13-stable.md | 24 ++++++++++++++++++++++++ utils/list-versions/version_date.tsv | 2 +- 5 files changed, 28 insertions(+), 4 deletions(-) create mode 100644 docs/changelogs/v24.7.2.13-stable.md diff --git a/docker/keeper/Dockerfile b/docker/keeper/Dockerfile index e99c86267f9..94603763572 100644 --- a/docker/keeper/Dockerfile +++ b/docker/keeper/Dockerfile @@ -34,7 +34,7 @@ RUN arch=${TARGETARCH:-amd64} \ # lts / testing / prestable / etc ARG REPO_CHANNEL="stable" ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}" -ARG VERSION="24.7.1.2915" +ARG VERSION="24.7.2.13" ARG PACKAGES="clickhouse-keeper" ARG DIRECT_DOWNLOAD_URLS="" diff --git a/docker/server/Dockerfile.alpine b/docker/server/Dockerfile.alpine index fb562b911a3..f40118c7b06 100644 --- a/docker/server/Dockerfile.alpine +++ b/docker/server/Dockerfile.alpine @@ -32,7 +32,7 @@ RUN arch=${TARGETARCH:-amd64} \ # lts / testing / prestable / etc ARG REPO_CHANNEL="stable" ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}" -ARG VERSION="24.7.1.2915" +ARG VERSION="24.7.2.13" ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" ARG DIRECT_DOWNLOAD_URLS="" diff --git a/docker/server/Dockerfile.ubuntu b/docker/server/Dockerfile.ubuntu index 51f4e6a0f40..032aa862e4a 100644 --- a/docker/server/Dockerfile.ubuntu +++ b/docker/server/Dockerfile.ubuntu @@ -28,7 +28,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list ARG REPO_CHANNEL="stable" ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main" -ARG VERSION="24.7.1.2915" +ARG VERSION="24.7.2.13" ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" #docker-official-library:off diff --git a/docs/changelogs/v24.7.2.13-stable.md b/docs/changelogs/v24.7.2.13-stable.md new file mode 100644 index 00000000000..4a2fb665116 --- /dev/null +++ b/docs/changelogs/v24.7.2.13-stable.md @@ -0,0 +1,24 @@ +--- +sidebar_position: 1 +sidebar_label: 2024 +--- + +# 2024 Changelog + +### ClickHouse release v24.7.2.13-stable (6e41f601b2f) FIXME as compared to v24.7.1.2915-stable (a37d2d43da7) + +#### Improvement +* Backported in [#67531](https://github.com/ClickHouse/ClickHouse/issues/67531): In pr : https://github.com/ClickHouse/ClickHouse/pull/66025, we introduce a settings `input_format_orc_read_use_writer_time_zone` to fix when read orc file, make the reader use writer timezone, not always use `GMT`. [#67175](https://github.com/ClickHouse/ClickHouse/pull/67175) ([kevinyhzou](https://github.com/KevinyhZou)). + +#### Critical Bug Fix (crash, LOGICAL_ERROR, data loss, RBAC) +* Backported in [#67505](https://github.com/ClickHouse/ClickHouse/issues/67505): Fix crash in DistributedAsyncInsert when connection is empty. [#67219](https://github.com/ClickHouse/ClickHouse/pull/67219) ([Pablo Marcos](https://github.com/pamarcos)). + +#### Bug Fix (user-visible misbehavior in an official stable release) +* Backported in [#67580](https://github.com/ClickHouse/ClickHouse/issues/67580): Fix execution of nested short-circuit functions. [#67520](https://github.com/ClickHouse/ClickHouse/pull/67520) ([Kruglov Pavel](https://github.com/Avogar)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Backported in [#67551](https://github.com/ClickHouse/ClickHouse/issues/67551): [Green CI] Fix test test_storage_s3_queue/test.py::test_max_set_age. [#67035](https://github.com/ClickHouse/ClickHouse/pull/67035) ([Pablo Marcos](https://github.com/pamarcos)). +* Backported in [#67514](https://github.com/ClickHouse/ClickHouse/issues/67514): Split test 02967_parallel_replicas_join_algo_and_analyzer. [#67211](https://github.com/ClickHouse/ClickHouse/pull/67211) ([Nikita Taranov](https://github.com/nickitat)). +* Backported in [#67545](https://github.com/ClickHouse/ClickHouse/issues/67545): [Green CI] Fix WriteBuffer destructor when finalize has failed for MergeTreeDeduplicationLog::shutdown. [#67474](https://github.com/ClickHouse/ClickHouse/pull/67474) ([Alexey Katsman](https://github.com/alexkats)). + diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv index abd8f84ec74..b1391c2d781 100644 --- a/utils/list-versions/version_date.tsv +++ b/utils/list-versions/version_date.tsv @@ -1,3 +1,4 @@ +v24.7.2.13-stable 2024-08-01 v24.7.1.2915-stable 2024-07-30 v24.6.2.17-stable 2024-07-05 v24.6.1.4423-stable 2024-07-01 @@ -5,7 +6,6 @@ v24.5.4.49-stable 2024-07-01 v24.5.3.5-stable 2024-06-13 v24.5.2.34-stable 2024-06-13 v24.5.1.1763-stable 2024-06-01 -v24.4.4.107-stable 2024-07-31 v24.4.3.25-stable 2024-06-14 v24.4.2.141-stable 2024-06-07 v24.4.1.2088-stable 2024-05-01 From c2df527a32d640f52296ea7aefae177e22504082 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Thu, 1 Aug 2024 08:42:54 +0200 Subject: [PATCH 531/661] Reduce fault rate --- .../test_keeper_map_retries/configs/fault_injection.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/integration/test_keeper_map_retries/configs/fault_injection.xml b/tests/integration/test_keeper_map_retries/configs/fault_injection.xml index 145945c7c7c..0933b6b3031 100644 --- a/tests/integration/test_keeper_map_retries/configs/fault_injection.xml +++ b/tests/integration/test_keeper_map_retries/configs/fault_injection.xml @@ -1,6 +1,6 @@ - 0.05 - 0.05 + 0.005 + 0.005 From 5564489cca1c14e95e7c543e03c508849abaf079 Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Thu, 1 Aug 2024 15:31:54 +0800 Subject: [PATCH 532/661] change as request --- .../functions/tuple-map-functions.md | 18 +++++++++++--- src/Functions/map.cpp | 24 ++++++++++--------- .../0_stateless/01651_map_functions.reference | 4 ++++ .../0_stateless/01651_map_functions.sql | 7 +++++- 4 files changed, 38 insertions(+), 15 deletions(-) diff --git a/docs/en/sql-reference/functions/tuple-map-functions.md b/docs/en/sql-reference/functions/tuple-map-functions.md index db66188b1f5..d670ed42a2a 100644 --- a/docs/en/sql-reference/functions/tuple-map-functions.md +++ b/docs/en/sql-reference/functions/tuple-map-functions.md @@ -43,7 +43,7 @@ Result: ## mapFromArrays -Creates a map from an array of keys and an array of values. +Creates a map from an array or map of keys and an array or map of values. The function is a convenient alternative to syntax `CAST([...], 'Map(key_type, value_type)')`. For example, instead of writing @@ -62,8 +62,8 @@ Alias: `MAP_FROM_ARRAYS(keys, values)` **Arguments** -- `keys` — Array or map of keys to create the map from. [Array(T)](../data-types/array.md) where `T` can be any type supported by [Map](../data-types/map.md) as key type, or [Map](../data-types/map.md). -- `values` - Array or map of values to create the map from. [Array](../data-types/array.md) or [Map](../data-types/map.md). +- `keys` — Array or map of keys to create the map from [Array](../data-types/array.md) or [Map](../data-types/map.md). If `keys` is an array, we accept `Array(Nullable(T))` or `Array(LowCardinality(Nullable(T)))` as its type as long as it doesn't contain NULL value. +- `values` - Array or map of values to create the map from [Array](../data-types/array.md) or [Map](../data-types/map.md). **Returned value** @@ -99,6 +99,18 @@ Result: └───────────────────────────────────────────────────────┘ ``` +```sql +SELECT mapFromArrays(map('a', 1, 'b', 2, 'c', 3), [1, 2, 3]) +``` + +Result: + +``` +┌─mapFromArrays(map('a', 1, 'b', 2, 'c', 3), [1, 2, 3])─┐ +│ {('a',1):1,('b',2):2,('c',3):3} │ +└───────────────────────────────────────────────────────┘ +``` + ## extractKeyValuePairs Converts a string of key-value pairs to a [Map(String, String)](../data-types/map.md). diff --git a/src/Functions/map.cpp b/src/Functions/map.cpp index a8e5f7ad90e..738c61164a3 100644 --- a/src/Functions/map.cpp +++ b/src/Functions/map.cpp @@ -23,6 +23,7 @@ namespace ErrorCodes extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int SIZES_OF_ARRAYS_DONT_MATCH; extern const int ILLEGAL_COLUMN; + extern const int BAD_ARGUMENTS; } namespace @@ -157,7 +158,7 @@ private: bool use_variant_as_common_type = false; }; -/// mapFromArrays(keys, values) is a function that allows you to make key-value pair from a pair of arrays +/// mapFromArrays(keys, values) is a function that allows you to make key-value pair from a pair of arrays or maps class FunctionMapFromArrays : public IFunction { public: @@ -181,13 +182,13 @@ public: getName(), arguments.size()); - auto get_nested_type = [this](const DataTypePtr & type) -> DataTypePtr + auto get_nested_type = [&](const DataTypePtr & type) { DataTypePtr nested; - if (const auto * array_type = checkAndGetDataType(type.get())) - nested = array_type->getNestedType(); - else if (const auto * map_type = checkAndGetDataType(type.get())) - nested = std::make_shared(map_type->getKeyValueTypes()); + if (const auto * type_as_array = checkAndGetDataType(type.get())) + nested = type_as_array->getNestedType(); + else if (const auto * type_as_map = checkAndGetDataType(type.get())) + nested = std::make_shared(type_as_map->getKeyValueTypes()); else throw Exception( ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, @@ -201,8 +202,9 @@ public: auto key_type = get_nested_type(arguments[0]); auto value_type = get_nested_type(arguments[1]); - /// Remove Nullable from key_type if needed for map key must not be Nullable + /// We accept Array(Nullable(T)) or Array(LowCardinality(Nullable(T))) as key types as long as the actual array doesn't contain NULL value(this is checked in executeImpl). key_type = removeNullableOrLowCardinalityNullable(key_type); + DataTypes key_value_types{key_type, value_type}; return std::make_shared(key_value_types); } @@ -210,7 +212,7 @@ public: ColumnPtr executeImpl( const ColumnsWithTypeAndName & arguments, const DataTypePtr & /* result_type */, size_t /* input_rows_count */) const override { - auto get_array_column = [this](const ColumnPtr & column) -> std::pair + auto get_array_column = [&](const ColumnPtr & column) -> std::pair { bool is_const = isColumnConst(*column); ColumnPtr holder = is_const ? column->convertToFullColumnIfConst() : column; @@ -231,8 +233,9 @@ public: }; auto [col_keys, key_holder] = get_array_column(arguments[0].column); + auto [col_values, values_holder] = get_array_column(arguments[1].column); - /// Check if nested column of first argument contains NULL value in case its nested type is Nullable(T) type. + /// Nullable(T) or LowCardinality(Nullable(T)) are okay as nested key types but actual NULL values are not okay. ColumnPtr data_keys = col_keys->getDataPtr(); if (isColumnNullableOrLowCardinalityNullable(*data_keys)) { @@ -253,10 +256,9 @@ public: if (null_map && !memoryIsZero(null_map->data(), 0, null_map->size())) throw Exception( - ErrorCodes::ILLEGAL_COLUMN, "The nested column of first argument in function {} must not contain NULLs", getName()); + ErrorCodes::BAD_ARGUMENTS, "The nested column of first argument in function {} must not contain NULLs", getName()); } - auto [col_values, values_holder] = get_array_column(arguments[1].column); if (!col_keys->hasEqualOffsets(*col_values)) throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH, "Two arguments of function {} must have equal sizes", getName()); diff --git a/tests/queries/0_stateless/01651_map_functions.reference b/tests/queries/0_stateless/01651_map_functions.reference index 9114aa419b1..e336a02318d 100644 --- a/tests/queries/0_stateless/01651_map_functions.reference +++ b/tests/queries/0_stateless/01651_map_functions.reference @@ -55,3 +55,7 @@ {1:3,2:4} {1:3,2:4} {1:3,2:4} {(1,3):'a',(2,4):'b'} +{(1,'a'):'c',(2,'b'):'d'} +{(1,'a'):'c',(2,'b'):'d'} +{(1,'a'):'c',(2,'b'):'d'} +{(1,'a'):'c',(2,'b'):'d'} diff --git a/tests/queries/0_stateless/01651_map_functions.sql b/tests/queries/0_stateless/01651_map_functions.sql index 4604ddd6db1..dc93a38b265 100644 --- a/tests/queries/0_stateless/01651_map_functions.sql +++ b/tests/queries/0_stateless/01651_map_functions.sql @@ -68,7 +68,7 @@ select mapFromArrays([[1,2], [3,4]], [4, 5, 6]); -- { serverError SIZES_OF_ARRAY select mapFromArrays(['a', 2], [4, 5]); -- { serverError NO_COMMON_TYPE} select mapFromArrays([1, 2], [4, 'a']); -- { serverError NO_COMMON_TYPE} select mapFromArrays(['aa', 'bb'], map('a', 4)); -- { serverError SIZES_OF_ARRAYS_DONT_MATCH } -select mapFromArrays([1,null]::Array(Nullable(UInt8)), [3,4]); -- { serverError ILLEGAL_COLUMN } +select mapFromArrays([1,null]::Array(Nullable(UInt8)), [3,4]); -- { serverError BAD_ARGUMENTS } select mapFromArrays(['aa', 'bb'], map('a', 4, 'b', 5)); select mapFromArrays(['aa', 'bb'], materialize(map('a', 4, 'b', 5))) from numbers(2); @@ -79,3 +79,8 @@ select mapFromArrays([toLowCardinality(1), toLowCardinality(2)], materialize([4, select mapFromArrays([1,2], [3,4]); select mapFromArrays([1,2]::Array(Nullable(UInt8)), [3,4]); select mapFromArrays([1,2], [3,4]) as x, mapFromArrays(x, ['a', 'b']); + +select mapFromArrays(map(1, 'a', 2, 'b'), array('c', 'd')); +select mapFromArrays(materialize(map(1, 'a', 2, 'b')), array('c', 'd')); +select mapFromArrays(map(1, 'a', 2, 'b'), materialize(array('c', 'd'))); +select mapFromArrays(materialize(map(1, 'a', 2, 'b')), materialize(array('c', 'd'))); From 681441e170202bc3963fb3fa1d7b7785192dbd2e Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Thu, 1 Aug 2024 16:01:39 +0800 Subject: [PATCH 533/661] fix style --- src/Functions/map.cpp | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/src/Functions/map.cpp b/src/Functions/map.cpp index 738c61164a3..6e389f39dec 100644 --- a/src/Functions/map.cpp +++ b/src/Functions/map.cpp @@ -192,7 +192,7 @@ public: else throw Exception( ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "Argument types of function {} must be Array or Map, but {} is given", + "Arguments of function {} must be Array or Map, but {} is given", getName(), type->getName()); @@ -275,10 +275,7 @@ public: static constexpr auto name = "mapUpdate"; static FunctionPtr create(ContextPtr) { return std::make_shared(); } - String getName() const override - { - return name; - } + String getName() const override { return name; } size_t getNumberOfArguments() const override { return 2; } @@ -287,9 +284,11 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { if (arguments.size() != 2) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + throw Exception( + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Number of arguments for function {} doesn't match: passed {}, should be 2", - getName(), arguments.size()); + getName(), + arguments.size()); const auto * left = checkAndGetDataType(arguments[0].type.get()); const auto * right = checkAndGetDataType(arguments[1].type.get()); @@ -405,7 +404,6 @@ public: return ColumnMap::create(nested_column); } }; - } REGISTER_FUNCTION(Map) From 7db4065898633ace1f909711d4caeda8d135cace Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Thu, 1 Aug 2024 10:30:50 +0200 Subject: [PATCH 534/661] Add retries to create --- src/Storages/StorageKeeperMap.cpp | 373 +++++++++++------- src/Storages/StorageKeeperMap.h | 6 +- .../configs/keeper_retries.xml | 14 + tests/integration/test_keeper_map/test.py | 3 +- .../configs/fault_injection.xml | 1 + .../configs/keeper_retries.xml | 14 + .../test_keeper_map_retries/test.py | 13 +- 7 files changed, 275 insertions(+), 149 deletions(-) create mode 100644 tests/integration/test_keeper_map/configs/keeper_retries.xml create mode 100644 tests/integration/test_keeper_map_retries/configs/keeper_retries.xml diff --git a/src/Storages/StorageKeeperMap.cpp b/src/Storages/StorageKeeperMap.cpp index 0634c7be6ee..a6be9f8da04 100644 --- a/src/Storages/StorageKeeperMap.cpp +++ b/src/Storages/StorageKeeperMap.cpp @@ -408,104 +408,192 @@ StorageKeeperMap::StorageKeeperMap( if (attach) { - checkTable(); + checkTable(context_); return; } - auto client = getClient(); + const auto & settings = context_->getSettingsRef(); + ZooKeeperRetriesControl zk_retry{ + getName(), + getLogger(getName()), + ZooKeeperRetriesInfo{settings.keeper_max_retries, settings.keeper_retry_initial_backoff_ms, settings.keeper_retry_max_backoff_ms}, + context_->getProcessListElement()}; - if (zk_root_path != "/" && !client->exists(zk_root_path)) - { - LOG_TRACE(log, "Creating root path {}", zk_root_path); - client->createAncestors(zk_root_path); - client->createIfNotExists(zk_root_path, ""); - } + zk_retry.retryLoop( + [&] + { + auto client = getClient(); + if (zk_root_path != "/" && !client->exists(zk_root_path)) + { + LOG_TRACE(log, "Creating root path {}", zk_root_path); + client->createAncestors(zk_root_path); + client->createIfNotExists(zk_root_path, ""); + } + }); + + std::shared_ptr metadata_drop_lock; + int32_t drop_lock_version = -1; for (size_t i = 0; i < 1000; ++i) { - std::string stored_metadata_string; - auto exists = client->tryGet(zk_metadata_path, stored_metadata_string); - - if (exists) - { - // this requires same name for columns - // maybe we can do a smarter comparison for columns and primary key expression - if (stored_metadata_string != metadata_string) - throw Exception( - ErrorCodes::BAD_ARGUMENTS, - "Path {} is already used but the stored table definition doesn't match. Stored metadata: {}", - zk_root_path, - stored_metadata_string); - - auto code = client->tryCreate(zk_table_path, "", zkutil::CreateMode::Persistent); - - /// A table on the same Keeper path already exists, we just appended our table id to subscribe as a new replica - /// We still don't know if the table matches the expected metadata so table_is_valid is not changed - /// It will be checked lazily on the first operation - if (code == Coordination::Error::ZOK) - return; - - if (code != Coordination::Error::ZNONODE) - throw zkutil::KeeperException(code, "Failed to create table on path {} because a table with same UUID already exists", zk_root_path); - - /// ZNONODE means we dropped zk_tables_path but didn't finish drop completely - } - - if (client->exists(zk_dropped_path)) - { - LOG_INFO(log, "Removing leftover nodes"); - auto code = client->tryCreate(zk_dropped_lock_path, "", zkutil::CreateMode::Ephemeral); - - if (code == Coordination::Error::ZNONODE) + bool success = false; + zk_retry.retryLoop( + [&] { - LOG_INFO(log, "Someone else removed leftover nodes"); - } - else if (code == Coordination::Error::ZNODEEXISTS) - { - LOG_INFO(log, "Someone else is removing leftover nodes"); - continue; - } - else if (code != Coordination::Error::ZOK) - { - throw Coordination::Exception::fromPath(code, zk_dropped_lock_path); - } - else - { - auto metadata_drop_lock = zkutil::EphemeralNodeHolder::existing(zk_dropped_lock_path, *client); - if (!dropTable(client, metadata_drop_lock)) - continue; - } - } + auto client = getClient(); + std::string stored_metadata_string; + auto exists = client->tryGet(zk_metadata_path, stored_metadata_string); - Coordination::Requests create_requests - { - zkutil::makeCreateRequest(zk_metadata_path, metadata_string, zkutil::CreateMode::Persistent), - zkutil::makeCreateRequest(zk_data_path, metadata_string, zkutil::CreateMode::Persistent), - zkutil::makeCreateRequest(zk_tables_path, "", zkutil::CreateMode::Persistent), - zkutil::makeCreateRequest(zk_table_path, "", zkutil::CreateMode::Persistent), - }; + if (exists) + { + // this requires same name for columns + // maybe we can do a smarter comparison for columns and primary key expression + if (stored_metadata_string != metadata_string) + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "Path {} is already used but the stored table definition doesn't match. Stored metadata: {}", + zk_root_path, + stored_metadata_string); - Coordination::Responses create_responses; - auto code = client->tryMulti(create_requests, create_responses); - if (code == Coordination::Error::ZNODEEXISTS) - { - LOG_INFO(log, "It looks like a table on path {} was created by another server at the same moment, will retry", zk_root_path); - continue; - } - else if (code != Coordination::Error::ZOK) - { - zkutil::KeeperMultiException::check(code, create_requests, create_responses); - } + auto code = client->tryCreate(zk_table_path, "", zkutil::CreateMode::Persistent); + /// A table on the same Keeper path already exists, we just appended our table id to subscribe as a new replica + /// We still don't know if the table matches the expected metadata so table_is_valid is not changed + /// It will be checked lazily on the first operation + if (code == Coordination::Error::ZOK) + { + success = true; + return; + } - table_status = TableStatus::VALID; - /// we are the first table created for the specified Keeper path, i.e. we are the first replica - return; + /// We most likely created the path but got a timeout or disconnect + if (code == Coordination::Error::ZNODEEXISTS && zk_retry.isRetry()) + { + success = true; + return; + } + + if (code != Coordination::Error::ZNONODE) + throw zkutil::KeeperException( + code, "Failed to create table on path {} because a table with same UUID already exists", zk_root_path); + + /// ZNONODE means we dropped zk_tables_path but didn't finish drop completely + } + + if (client->exists(zk_dropped_path)) + { + LOG_INFO(log, "Removing leftover nodes"); + + bool drop_finished = false; + if (zk_retry.isRetry() && metadata_drop_lock != nullptr && drop_lock_version != -1) + { + /// if we have leftover lock from previous try, we need to recreate the ephemeral with our session + Coordination::Requests drop_lock_requests{ + zkutil::makeRemoveRequest(zk_dropped_lock_path, drop_lock_version), + zkutil::makeCreateRequest(zk_dropped_lock_path, "", zkutil::CreateMode::Ephemeral), + }; + + Coordination::Responses drop_lock_responses; + auto lock_code = client->tryMulti(drop_lock_requests, drop_lock_responses); + if (lock_code == Coordination::Error::ZBADVERSION) + { + LOG_INFO(log, "Someone else is removing leftover nodes"); + metadata_drop_lock->setAlreadyRemoved(); + metadata_drop_lock.reset(); + return; + } + + if (drop_lock_responses[0]->error == Coordination::Error::ZNONODE) + { + /// someone else removed metadata nodes or the previous ephemeral node expired + /// we will try creating dropped lock again to make sure + metadata_drop_lock->setAlreadyRemoved(); + metadata_drop_lock.reset(); + } + else if (lock_code == Coordination::Error::ZOK) + { + metadata_drop_lock->setAlreadyRemoved(); + metadata_drop_lock = zkutil::EphemeralNodeHolder::existing(zk_dropped_lock_path, *client); + drop_lock_version = -1; + Coordination::Stat lock_stat; + client->get(zk_dropped_lock_path, &lock_stat); + drop_lock_version = lock_stat.version; + if (!dropTable(client, metadata_drop_lock)) + { + metadata_drop_lock.reset(); + return; + } + drop_finished = true; + } + } + + if (!drop_finished) + { + auto code = client->tryCreate(zk_dropped_lock_path, "", zkutil::CreateMode::Ephemeral); + + if (code == Coordination::Error::ZNONODE) + { + LOG_INFO(log, "Someone else removed leftover nodes"); + } + else if (code == Coordination::Error::ZNODEEXISTS) + { + LOG_INFO(log, "Someone else is removing leftover nodes"); + return; + } + else if (code != Coordination::Error::ZOK) + { + throw Coordination::Exception::fromPath(code, zk_dropped_lock_path); + } + else + { + metadata_drop_lock = zkutil::EphemeralNodeHolder::existing(zk_dropped_lock_path, *client); + drop_lock_version = -1; + Coordination::Stat lock_stat; + client->get(zk_dropped_lock_path, &lock_stat); + drop_lock_version = lock_stat.version; + if (!dropTable(client, metadata_drop_lock)) + { + metadata_drop_lock.reset(); + return; + } + } + } + } + + Coordination::Requests create_requests{ + zkutil::makeCreateRequest(zk_metadata_path, metadata_string, zkutil::CreateMode::Persistent), + zkutil::makeCreateRequest(zk_data_path, metadata_string, zkutil::CreateMode::Persistent), + zkutil::makeCreateRequest(zk_tables_path, "", zkutil::CreateMode::Persistent), + zkutil::makeCreateRequest(zk_table_path, "", zkutil::CreateMode::Persistent), + }; + + Coordination::Responses create_responses; + auto code = client->tryMulti(create_requests, create_responses); + if (code == Coordination::Error::ZNODEEXISTS) + { + LOG_INFO( + log, "It looks like a table on path {} was created by another server at the same moment, will retry", zk_root_path); + return; + } + else if (code != Coordination::Error::ZOK) + { + zkutil::KeeperMultiException::check(code, create_requests, create_responses); + } + + table_status = TableStatus::VALID; + /// we are the first table created for the specified Keeper path, i.e. we are the first replica + success = true; + }); + + if (success) + return; } - throw Exception(ErrorCodes::BAD_ARGUMENTS, - "Cannot create metadata for table, because it is removed concurrently or because " - "of wrong zk_root_path ({})", zk_root_path); + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "Cannot create metadata for table, because it is removed concurrently or because " + "of wrong zk_root_path ({})", + zk_root_path); } @@ -518,7 +606,7 @@ Pipe StorageKeeperMap::read( size_t max_block_size, size_t num_streams) { - checkTable(); + checkTable(context_); storage_snapshot->check(column_names); FieldVectorPtr filtered_keys; @@ -592,13 +680,13 @@ Pipe StorageKeeperMap::read( SinkToStoragePtr StorageKeeperMap::write(const ASTPtr & /*query*/, const StorageMetadataPtr & metadata_snapshot, ContextPtr local_context, bool /*async_insert*/) { - checkTable(); + checkTable(local_context); return std::make_shared(*this, metadata_snapshot->getSampleBlock(), local_context); } void StorageKeeperMap::truncate(const ASTPtr &, const StorageMetadataPtr &, ContextPtr local_context, TableExclusiveLockHolder &) { - checkTable(); + checkTable(local_context); const auto & settings = local_context->getSettingsRef(); ZooKeeperRetriesControl zk_retry{ getName(), @@ -657,7 +745,7 @@ bool StorageKeeperMap::dropTable(zkutil::ZooKeeperPtr zookeeper, const zkutil::E void StorageKeeperMap::drop() { - auto current_table_status = getTableStatus(); + auto current_table_status = getTableStatus(getContext()); if (current_table_status == TableStatus::UNKNOWN) { static constexpr auto error_msg = "Failed to activate table because of connection issues. It will be activated " @@ -666,7 +754,7 @@ void StorageKeeperMap::drop() } /// if only column metadata is wrong we can still drop the table correctly - if (current_table_status == TableStatus::INVALID_KEEPER_STRUCTURE) + if (current_table_status == TableStatus::INVALID_METADATA) return; auto client = getClient(); @@ -1029,7 +1117,7 @@ UInt64 StorageKeeperMap::keysLimit() const return keys_limit; } -StorageKeeperMap::TableStatus StorageKeeperMap::getTableStatus() const +StorageKeeperMap::TableStatus StorageKeeperMap::getTableStatus(const ContextPtr & local_context) const { std::lock_guard lock{init_mutex}; if (table_status != TableStatus::UNKNOWN) @@ -1039,57 +1127,70 @@ StorageKeeperMap::TableStatus StorageKeeperMap::getTableStatus() const { try { - auto client = getClient(); + const auto & settings = local_context->getSettingsRef(); + ZooKeeperRetriesControl zk_retry{ + getName(), + getLogger(getName()), + ZooKeeperRetriesInfo{ + settings.keeper_max_retries, + settings.keeper_retry_initial_backoff_ms, + settings.keeper_retry_max_backoff_ms}, + local_context->getProcessListElement()}; - Coordination::Stat metadata_stat; - auto stored_metadata_string = client->get(zk_metadata_path, &metadata_stat); - - if (metadata_stat.numChildren == 0) + zk_retry.retryLoop([&] { + auto client = getClient(); + + Coordination::Stat metadata_stat; + auto stored_metadata_string = client->get(zk_metadata_path, &metadata_stat); + + if (metadata_stat.numChildren == 0) + { + table_status = TableStatus::INVALID_KEEPER_STRUCTURE; + return; + } + + if (metadata_string != stored_metadata_string) + { + LOG_ERROR( + log, + "Table definition does not match to the one stored in the path {}. Stored definition: {}", + zk_root_path, + stored_metadata_string); + table_status = TableStatus::INVALID_METADATA; + return; + } + + // validate all metadata and data nodes are present + Coordination::Requests requests; + requests.push_back(zkutil::makeCheckRequest(zk_table_path, -1)); + requests.push_back(zkutil::makeCheckRequest(zk_data_path, -1)); + requests.push_back(zkutil::makeCheckRequest(zk_dropped_path, -1)); + + Coordination::Responses responses; + client->tryMulti(requests, responses); + table_status = TableStatus::INVALID_KEEPER_STRUCTURE; - return; - } + if (responses[0]->error != Coordination::Error::ZOK) + { + LOG_ERROR(log, "Table node ({}) is missing", zk_table_path); + return; + } - if (metadata_string != stored_metadata_string) - { - LOG_ERROR( - log, - "Table definition does not match to the one stored in the path {}. Stored definition: {}", - zk_root_path, - stored_metadata_string); - table_status = TableStatus::INVALID_METADATA; - return; - } + if (responses[1]->error != Coordination::Error::ZOK) + { + LOG_ERROR(log, "Data node ({}) is missing", zk_data_path); + return; + } - // validate all metadata and data nodes are present - Coordination::Requests requests; - requests.push_back(zkutil::makeCheckRequest(zk_table_path, -1)); - requests.push_back(zkutil::makeCheckRequest(zk_data_path, -1)); - requests.push_back(zkutil::makeCheckRequest(zk_dropped_path, -1)); + if (responses[2]->error == Coordination::Error::ZOK) + { + LOG_ERROR(log, "Tables with root node {} are being dropped", zk_root_path); + return; + } - Coordination::Responses responses; - client->tryMulti(requests, responses); - - table_status = TableStatus::INVALID_KEEPER_STRUCTURE; - if (responses[0]->error != Coordination::Error::ZOK) - { - LOG_ERROR(log, "Table node ({}) is missing", zk_table_path); - return; - } - - if (responses[1]->error != Coordination::Error::ZOK) - { - LOG_ERROR(log, "Data node ({}) is missing", zk_data_path); - return; - } - - if (responses[2]->error == Coordination::Error::ZOK) - { - LOG_ERROR(log, "Tables with root node {} are being dropped", zk_root_path); - return; - } - - table_status = TableStatus::VALID; + table_status = TableStatus::VALID; + }); } catch (const Coordination::Exception & e) { @@ -1227,7 +1328,7 @@ void StorageKeeperMap::checkMutationIsPossible(const MutationCommands & commands void StorageKeeperMap::mutate(const MutationCommands & commands, ContextPtr local_context) { - checkTable(); + checkTable(local_context); if (commands.empty()) return; diff --git a/src/Storages/StorageKeeperMap.h b/src/Storages/StorageKeeperMap.h index 8ed348a4f6f..1464eeaabad 100644 --- a/src/Storages/StorageKeeperMap.h +++ b/src/Storages/StorageKeeperMap.h @@ -78,9 +78,9 @@ public: UInt64 keysLimit() const; template - void checkTable() const + void checkTable(const ContextPtr & local_context) const { - auto current_table_status = getTableStatus(); + auto current_table_status = getTableStatus(local_context); if (table_status == TableStatus::UNKNOWN) { static constexpr auto error_msg = "Failed to activate table because of connection issues. It will be activated " @@ -119,7 +119,7 @@ private: VALID }; - TableStatus getTableStatus() const; + TableStatus getTableStatus(const ContextPtr & context) const; void restoreDataImpl( const BackupPtr & backup, diff --git a/tests/integration/test_keeper_map/configs/keeper_retries.xml b/tests/integration/test_keeper_map/configs/keeper_retries.xml new file mode 100644 index 00000000000..43e5b9a09e8 --- /dev/null +++ b/tests/integration/test_keeper_map/configs/keeper_retries.xml @@ -0,0 +1,14 @@ + + + + 0 + 0 + + + + + + default + + + diff --git a/tests/integration/test_keeper_map/test.py b/tests/integration/test_keeper_map/test.py index 4b1bcd11cfe..861a7c47687 100644 --- a/tests/integration/test_keeper_map/test.py +++ b/tests/integration/test_keeper_map/test.py @@ -10,6 +10,7 @@ cluster = ClickHouseCluster(__file__) node = cluster.add_instance( "node", main_configs=["configs/enable_keeper_map.xml"], + user_configs=["configs/keeper_retries.xml"], with_zookeeper=True, stay_alive=True, ) @@ -49,7 +50,6 @@ def assert_keeper_exception_after_partition(query): error = node.query_and_get_error_with_retry( query, sleep_time=1, - settings={"insert_keeper_max_retries": 1, "keeper_max_retries": 1}, ) assert "Coordination::Exception" in error except: @@ -91,7 +91,6 @@ def test_keeper_map_without_zk(started_cluster): error = node.query_and_get_error_with_retry( "SELECT * FROM test_keeper_map_without_zk", sleep_time=1, - settings={"keeper_max_retries": 1}, ) assert "Failed to activate table because of connection issues" in error except: diff --git a/tests/integration/test_keeper_map_retries/configs/fault_injection.xml b/tests/integration/test_keeper_map_retries/configs/fault_injection.xml index 0933b6b3031..8406b7db785 100644 --- a/tests/integration/test_keeper_map_retries/configs/fault_injection.xml +++ b/tests/integration/test_keeper_map_retries/configs/fault_injection.xml @@ -1,5 +1,6 @@ + 1 0.005 0.005 diff --git a/tests/integration/test_keeper_map_retries/configs/keeper_retries.xml b/tests/integration/test_keeper_map_retries/configs/keeper_retries.xml new file mode 100644 index 00000000000..208dd6e47fa --- /dev/null +++ b/tests/integration/test_keeper_map_retries/configs/keeper_retries.xml @@ -0,0 +1,14 @@ + + + + 20 + 10000 + + + + + + default + + + diff --git a/tests/integration/test_keeper_map_retries/test.py b/tests/integration/test_keeper_map_retries/test.py index 352119147cd..c6760e5d1a2 100644 --- a/tests/integration/test_keeper_map_retries/test.py +++ b/tests/integration/test_keeper_map_retries/test.py @@ -11,6 +11,7 @@ cluster = ClickHouseCluster(__file__) node = cluster.add_instance( "node", main_configs=["configs/enable_keeper_map.xml"], + user_configs=["configs/keeper_retries.xml"], with_zookeeper=True, stay_alive=True, ) @@ -42,10 +43,6 @@ def repeat_query(query, repeat): for _ in range(repeat): node.query( query, - settings={ - "keeper_max_retries": 20, - "keeper_retry_max_backoff_ms": 10000, - }, ) @@ -53,10 +50,6 @@ def test_queries(started_cluster): start_clean_clickhouse() node.query("DROP TABLE IF EXISTS keeper_map_retries SYNC") - node.query( - "CREATE TABLE keeper_map_retries (a UInt64, b UInt64) Engine=KeeperMap('/keeper_map_retries') PRIMARY KEY a" - ) - node.stop_clickhouse() node.copy_file_to_container( os.path.join(CONFIG_DIR, "fault_injection.xml"), @@ -66,6 +59,10 @@ def test_queries(started_cluster): repeat_count = 10 + node.query( + "CREATE TABLE keeper_map_retries (a UInt64, b UInt64) Engine=KeeperMap('/keeper_map_retries') PRIMARY KEY a", + ) + repeat_query( "INSERT INTO keeper_map_retries SELECT number, number FROM numbers(500)", repeat_count, From ca01c1c5691e4562ae6fc71af7b1867cf39f7ad1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Thu, 1 Aug 2024 11:33:22 +0200 Subject: [PATCH 535/661] Fix bad merge --- src/Core/SettingsChangesHistory.cpp | 259 ---------------------------- 1 file changed, 259 deletions(-) diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp index c3b9805700f..8f73e10c44f 100644 --- a/src/Core/SettingsChangesHistory.cpp +++ b/src/Core/SettingsChangesHistory.cpp @@ -497,265 +497,6 @@ static std::initializer_list col >= '2023-01-01' AND col <= '2023-12-31')"}, - {"extract_key_value_pairs_max_pairs_per_row", 0, 0, "Max number of pairs that can be produced by the `extractKeyValuePairs` function. Used as a safeguard against consuming too much memory."}, - {"default_view_definer", "CURRENT_USER", "CURRENT_USER", "Allows to set default `DEFINER` option while creating a view"}, - {"default_materialized_view_sql_security", "DEFINER", "DEFINER", "Allows to set a default value for SQL SECURITY option when creating a materialized view"}, - {"default_normal_view_sql_security", "INVOKER", "INVOKER", "Allows to set default `SQL SECURITY` option while creating a normal view"}, - {"mysql_map_string_to_text_in_show_columns", false, true, "Reduce the configuration effort to connect ClickHouse with BI tools."}, - {"mysql_map_fixed_string_to_text_in_show_columns", false, true, "Reduce the configuration effort to connect ClickHouse with BI tools."}, - }}, - {"24.1", {{"print_pretty_type_names", false, true, "Better user experience."}, - {"input_format_json_read_bools_as_strings", false, true, "Allow to read bools as strings in JSON formats by default"}, - {"output_format_arrow_use_signed_indexes_for_dictionary", false, true, "Use signed indexes type for Arrow dictionaries by default as it's recommended"}, - {"allow_experimental_variant_type", false, false, "Add new experimental Variant type"}, - {"use_variant_as_common_type", false, false, "Allow to use Variant in if/multiIf if there is no common type"}, - {"output_format_arrow_use_64_bit_indexes_for_dictionary", false, false, "Allow to use 64 bit indexes type in Arrow dictionaries"}, - {"parallel_replicas_mark_segment_size", 128, 128, "Add new setting to control segment size in new parallel replicas coordinator implementation"}, - {"ignore_materialized_views_with_dropped_target_table", false, false, "Add new setting to allow to ignore materialized views with dropped target table"}, - {"output_format_compression_level", 3, 3, "Allow to change compression level in the query output"}, - {"output_format_compression_zstd_window_log", 0, 0, "Allow to change zstd window log in the query output when zstd compression is used"}, - {"enable_zstd_qat_codec", false, false, "Add new ZSTD_QAT codec"}, - {"enable_vertical_final", false, true, "Use vertical final by default"}, - {"output_format_arrow_use_64_bit_indexes_for_dictionary", false, false, "Allow to use 64 bit indexes type in Arrow dictionaries"}, - {"max_rows_in_set_to_optimize_join", 100000, 0, "Disable join optimization as it prevents from read in order optimization"}, - {"output_format_pretty_color", true, "auto", "Setting is changed to allow also for auto value, disabling ANSI escapes if output is not a tty"}, - {"function_visible_width_behavior", 0, 1, "We changed the default behavior of `visibleWidth` to be more precise"}, - {"max_estimated_execution_time", 0, 0, "Separate max_execution_time and max_estimated_execution_time"}, - {"iceberg_engine_ignore_schema_evolution", false, false, "Allow to ignore schema evolution in Iceberg table engine"}, - {"optimize_injective_functions_in_group_by", false, true, "Replace injective functions by it's arguments in GROUP BY section in analyzer"}, - {"update_insert_deduplication_token_in_dependent_materialized_views", false, false, "Allow to update insert deduplication token with table identifier during insert in dependent materialized views"}, - {"azure_max_unexpected_write_error_retries", 4, 4, "The maximum number of retries in case of unexpected errors during Azure blob storage write"}, - {"split_parts_ranges_into_intersecting_and_non_intersecting_final", false, true, "Allow to split parts ranges into intersecting and non intersecting during FINAL optimization"}, - {"split_intersecting_parts_ranges_into_layers_final", true, true, "Allow to split intersecting parts ranges into layers during FINAL optimization"}}}, - {"23.12", {{"allow_suspicious_ttl_expressions", true, false, "It is a new setting, and in previous versions the behavior was equivalent to allowing."}, - {"input_format_parquet_allow_missing_columns", false, true, "Allow missing columns in Parquet files by default"}, - {"input_format_orc_allow_missing_columns", false, true, "Allow missing columns in ORC files by default"}, - {"input_format_arrow_allow_missing_columns", false, true, "Allow missing columns in Arrow files by default"}}}, - {"23.11", {{"parsedatetime_parse_without_leading_zeros", false, true, "Improved compatibility with MySQL DATE_FORMAT/STR_TO_DATE"}}}, - {"23.9", {{"optimize_group_by_constant_keys", false, true, "Optimize group by constant keys by default"}, - {"input_format_json_try_infer_named_tuples_from_objects", false, true, "Try to infer named Tuples from JSON objects by default"}, - {"input_format_json_read_numbers_as_strings", false, true, "Allow to read numbers as strings in JSON formats by default"}, - {"input_format_json_read_arrays_as_strings", false, true, "Allow to read arrays as strings in JSON formats by default"}, - {"input_format_json_infer_incomplete_types_as_strings", false, true, "Allow to infer incomplete types as Strings in JSON formats by default"}, - {"input_format_json_try_infer_numbers_from_strings", true, false, "Don't infer numbers from strings in JSON formats by default to prevent possible parsing errors"}, - {"http_write_exception_in_output_format", false, true, "Output valid JSON/XML on exception in HTTP streaming."}}}, - {"23.8", {{"rewrite_count_distinct_if_with_count_distinct_implementation", false, true, "Rewrite countDistinctIf with count_distinct_implementation configuration"}}}, - {"23.7", {{"function_sleep_max_microseconds_per_block", 0, 3000000, "In previous versions, the maximum sleep time of 3 seconds was applied only for `sleep`, but not for `sleepEachRow` function. In the new version, we introduce this setting. If you set compatibility with the previous versions, we will disable the limit altogether."}}}, - {"23.6", {{"http_send_timeout", 180, 30, "3 minutes seems crazy long. Note that this is timeout for a single network write call, not for the whole upload operation."}, - {"http_receive_timeout", 180, 30, "See http_send_timeout."}}}, - {"23.5", {{"input_format_parquet_preserve_order", true, false, "Allow Parquet reader to reorder rows for better parallelism."}, - {"parallelize_output_from_storages", false, true, "Allow parallelism when executing queries that read from file/url/s3/etc. This may reorder rows."}, - {"use_with_fill_by_sorting_prefix", false, true, "Columns preceding WITH FILL columns in ORDER BY clause form sorting prefix. Rows with different values in sorting prefix are filled independently"}, - {"output_format_parquet_compliant_nested_types", false, true, "Change an internal field name in output Parquet file schema."}}}, - {"23.4", {{"allow_suspicious_indices", true, false, "If true, index can defined with identical expressions"}, - {"allow_nonconst_timezone_arguments", true, false, "Allow non-const timezone arguments in certain time-related functions like toTimeZone(), fromUnixTimestamp*(), snowflakeToDateTime*()."}, - {"connect_timeout_with_failover_ms", 50, 1000, "Increase default connect timeout because of async connect"}, - {"connect_timeout_with_failover_secure_ms", 100, 1000, "Increase default secure connect timeout because of async connect"}, - {"hedged_connection_timeout_ms", 100, 50, "Start new connection in hedged requests after 50 ms instead of 100 to correspond with previous connect timeout"}, - {"formatdatetime_f_prints_single_zero", true, false, "Improved compatibility with MySQL DATE_FORMAT()/STR_TO_DATE()"}, - {"formatdatetime_parsedatetime_m_is_month_name", false, true, "Improved compatibility with MySQL DATE_FORMAT/STR_TO_DATE"}}}, - {"23.3", {{"output_format_parquet_version", "1.0", "2.latest", "Use latest Parquet format version for output format"}, - {"input_format_json_ignore_unknown_keys_in_named_tuple", false, true, "Improve parsing JSON objects as named tuples"}, - {"input_format_native_allow_types_conversion", false, true, "Allow types conversion in Native input forma"}, - {"output_format_arrow_compression_method", "none", "lz4_frame", "Use lz4 compression in Arrow output format by default"}, - {"output_format_parquet_compression_method", "snappy", "lz4", "Use lz4 compression in Parquet output format by default"}, - {"output_format_orc_compression_method", "none", "lz4_frame", "Use lz4 compression in ORC output format by default"}, - {"async_query_sending_for_remote", false, true, "Create connections and send query async across shards"}}}, - {"23.2", {{"output_format_parquet_fixed_string_as_fixed_byte_array", false, true, "Use Parquet FIXED_LENGTH_BYTE_ARRAY type for FixedString by default"}, - {"output_format_arrow_fixed_string_as_fixed_byte_array", false, true, "Use Arrow FIXED_SIZE_BINARY type for FixedString by default"}, - {"query_plan_remove_redundant_distinct", false, true, "Remove redundant Distinct step in query plan"}, - {"optimize_duplicate_order_by_and_distinct", true, false, "Remove duplicate ORDER BY and DISTINCT if it's possible"}, - {"insert_keeper_max_retries", 0, 20, "Enable reconnections to Keeper on INSERT, improve reliability"}}}, - {"23.1", {{"input_format_json_read_objects_as_strings", 0, 1, "Enable reading nested json objects as strings while object type is experimental"}, - {"input_format_json_defaults_for_missing_elements_in_named_tuple", false, true, "Allow missing elements in JSON objects while reading named tuples by default"}, - {"input_format_csv_detect_header", false, true, "Detect header in CSV format by default"}, - {"input_format_tsv_detect_header", false, true, "Detect header in TSV format by default"}, - {"input_format_custom_detect_header", false, true, "Detect header in CustomSeparated format by default"}, - {"query_plan_remove_redundant_sorting", false, true, "Remove redundant sorting in query plan. For example, sorting steps related to ORDER BY clauses in subqueries"}}}, - {"22.12", {{"max_size_to_preallocate_for_aggregation", 10'000'000, 100'000'000, "This optimizes performance"}, - {"query_plan_aggregation_in_order", 0, 1, "Enable some refactoring around query plan"}, - {"format_binary_max_string_size", 0, 1_GiB, "Prevent allocating large amount of memory"}}}, - {"22.11", {{"use_structure_from_insertion_table_in_table_functions", 0, 2, "Improve using structure from insertion table in table functions"}}}, - {"22.9", {{"force_grouping_standard_compatibility", false, true, "Make GROUPING function output the same as in SQL standard and other DBMS"}}}, - {"22.7", {{"cross_to_inner_join_rewrite", 1, 2, "Force rewrite comma join to inner"}, - {"enable_positional_arguments", false, true, "Enable positional arguments feature by default"}, - {"format_csv_allow_single_quotes", true, false, "Most tools don't treat single quote in CSV specially, don't do it by default too"}}}, - {"22.6", {{"output_format_json_named_tuples_as_objects", false, true, "Allow to serialize named tuples as JSON objects in JSON formats by default"}, - {"input_format_skip_unknown_fields", false, true, "Optimize reading subset of columns for some input formats"}}}, - {"22.5", {{"memory_overcommit_ratio_denominator", 0, 1073741824, "Enable memory overcommit feature by default"}, - {"memory_overcommit_ratio_denominator_for_user", 0, 1073741824, "Enable memory overcommit feature by default"}}}, - {"22.4", {{"allow_settings_after_format_in_insert", true, false, "Do not allow SETTINGS after FORMAT for INSERT queries because ClickHouse interpret SETTINGS as some values, which is misleading"}}}, - {"22.3", {{"cast_ipv4_ipv6_default_on_conversion_error", true, false, "Make functions cast(value, 'IPv4') and cast(value, 'IPv6') behave same as toIPv4 and toIPv6 functions"}}}, - {"21.12", {{"stream_like_engine_allow_direct_select", true, false, "Do not allow direct select for Kafka/RabbitMQ/FileLog by default"}}}, - {"21.9", {{"output_format_decimal_trailing_zeros", true, false, "Do not output trailing zeros in text representation of Decimal types by default for better looking output"}, - {"use_hedged_requests", false, true, "Enable Hedged Requests feature by default"}}}, - {"21.7", {{"legacy_column_name_of_tuple_literal", true, false, "Add this setting only for compatibility reasons. It makes sense to set to 'true', while doing rolling update of cluster from version lower than 21.7 to higher"}}}, - {"21.5", {{"async_socket_for_remote", false, true, "Fix all problems and turn on asynchronous reads from socket for remote queries by default again"}}}, - {"21.3", {{"async_socket_for_remote", true, false, "Turn off asynchronous reads from socket for remote queries because of some problems"}, - {"optimize_normalize_count_variants", false, true, "Rewrite aggregate functions that semantically equals to count() as count() by default"}, - {"normalize_function_names", false, true, "Normalize function names to their canonical names, this was needed for projection query routing"}}}, - {"21.2", {{"enable_global_with_statement", false, true, "Propagate WITH statements to UNION queries and all subqueries by default"}}}, - {"21.1", {{"insert_quorum_parallel", false, true, "Use parallel quorum inserts by default. It is significantly more convenient to use than sequential quorum inserts"}, - {"input_format_null_as_default", false, true, "Allow to insert NULL as default for input formats by default"}, - {"optimize_on_insert", false, true, "Enable data optimization on INSERT by default for better user experience"}, - {"use_compact_format_in_distributed_parts_names", false, true, "Use compact format for async INSERT into Distributed tables by default"}}}, - {"20.10", {{"format_regexp_escaping_rule", "Escaped", "Raw", "Use Raw as default escaping rule for Regexp format to male the behaviour more like to what users expect"}}}, - {"20.7", {{"show_table_uuid_in_table_create_query_if_not_nil", true, false, "Stop showing UID of the table in its CREATE query for Engine=Atomic"}}}, - {"20.5", {{"input_format_with_names_use_header", false, true, "Enable using header with names for formats with WithNames/WithNamesAndTypes suffixes"}, - {"allow_suspicious_codecs", true, false, "Don't allow to specify meaningless compression codecs"}}}, - {"20.4", {{"validate_polygons", false, true, "Throw exception if polygon is invalid in function pointInPolygon by default instead of returning possibly wrong results"}}}, - {"19.18", {{"enable_scalar_subquery_optimization", false, true, "Prevent scalar subqueries from (de)serializing large scalar values and possibly avoid running the same subquery more than once"}}}, - {"19.14", {{"any_join_distinct_right_table_keys", true, false, "Disable ANY RIGHT and ANY FULL JOINs by default to avoid inconsistency"}}}, - {"19.12", {{"input_format_defaults_for_omitted_fields", false, true, "Enable calculation of complex default expressions for omitted fields for some input formats, because it should be the expected behaviour"}}}, - {"19.5", {{"max_partitions_per_insert_block", 0, 100, "Add a limit for the number of partitions in one block"}}}, - {"18.12.17", {{"enable_optimize_predicate_expression", 0, 1, "Optimize predicates to subqueries by default"}}}, }; From 4768e3878552ae0ce9007c1e4f400943a5712825 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Thu, 1 Aug 2024 11:37:42 +0200 Subject: [PATCH 536/661] Update ref to 24.7.2 --- ...{02995_baseline_24_7_1.tsv => 02995_baseline_24_7_2.tsv} | 2 +- tests/queries/0_stateless/02995_new_settings_history.sh | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) rename tests/queries/0_stateless/{02995_baseline_24_7_1.tsv => 02995_baseline_24_7_2.tsv} (99%) diff --git a/tests/queries/0_stateless/02995_baseline_24_7_1.tsv b/tests/queries/0_stateless/02995_baseline_24_7_2.tsv similarity index 99% rename from tests/queries/0_stateless/02995_baseline_24_7_1.tsv rename to tests/queries/0_stateless/02995_baseline_24_7_2.tsv index 6c830da8646..10b392f3e04 100644 --- a/tests/queries/0_stateless/02995_baseline_24_7_1.tsv +++ b/tests/queries/0_stateless/02995_baseline_24_7_2.tsv @@ -462,7 +462,7 @@ input_format_orc_allow_missing_columns 1 input_format_orc_case_insensitive_column_matching 0 input_format_orc_filter_push_down 1 input_format_orc_import_nested 0 -input_format_orc_read_use_writer_time_zone 0 +input_format_orc_reader_time_zone_name GMT input_format_orc_row_batch_size 100000 input_format_orc_skip_columns_with_unsupported_types_in_schema_inference 0 input_format_orc_use_fast_decoder 1 diff --git a/tests/queries/0_stateless/02995_new_settings_history.sh b/tests/queries/0_stateless/02995_new_settings_history.sh index 917dacc04b0..7fb21f88fae 100755 --- a/tests/queries/0_stateless/02995_new_settings_history.sh +++ b/tests/queries/0_stateless/02995_new_settings_history.sh @@ -7,12 +7,12 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CUR_DIR"/../shell_config.sh # Note that this is a broad check. A per version check is done in the upgrade test -# Baseline generated with 24.7.1 -# clickhouse local --query "select name, default from system.settings order by name format TSV" > 02995_baseline_24_7_1.tsv +# Baseline generated with 24.7.2 +# clickhouse local --query "select name, default from system.settings order by name format TSV" > 02995_baseline_24_7_2.tsv $CLICKHOUSE_LOCAL --query " WITH old_settings AS ( - SELECT * FROM file('${CUR_DIR}/02995_baseline_24_7_1.tsv', 'TSV', 'name String, default String') + SELECT * FROM file('${CUR_DIR}/02995_baseline_24_7_2.tsv', 'TSV', 'name String, default String') ), new_settings AS ( From da3a37c561679daaecbcdece74f92ce98380b2b5 Mon Sep 17 00:00:00 2001 From: Pablo Marcos Date: Thu, 1 Aug 2024 09:38:27 +0000 Subject: [PATCH 537/661] Improve regex to take into account the xdist name in the instance --- .../test.py | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/tests/integration/test_zookeeper_config_load_balancing/test.py b/tests/integration/test_zookeeper_config_load_balancing/test.py index 9cdf7db2b08..cc0a9022674 100644 --- a/tests/integration/test_zookeeper_config_load_balancing/test.py +++ b/tests/integration/test_zookeeper_config_load_balancing/test.py @@ -71,7 +71,7 @@ def test_first_or_random(started_cluster): [ "bash", "-c", - "lsof -a -i4 -i6 -itcp -w | grep 'testzookeeperconfigloadbalancing_zoo1_1.*testzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l", + "lsof -a -i4 -i6 -itcp -w | grep -P 'testzookeeperconfigloadbalancing_(gw\\d+_)?zoo1_1.*testzookeeperconfigloadbalancing_(gw\\d+_)?default:2181' | grep ESTABLISHED | wc -l", ], privileged=True, user="root", @@ -99,7 +99,7 @@ def test_first_or_random(started_cluster): [ "bash", "-c", - "lsof -a -i4 -i6 -itcp -w | grep 'testzookeeperconfigloadbalancing_zoo1_1.*testzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l", + "lsof -a -i4 -i6 -itcp -w | grep -P 'testzookeeperconfigloadbalancing_(gw\\d+_)?zoo1_1.*testzookeeperconfigloadbalancing_(gw\\d+_)?default:2181' | grep ESTABLISHED | wc -l", ], privileged=True, user="root", @@ -127,7 +127,7 @@ def test_first_or_random(started_cluster): [ "bash", "-c", - "lsof -a -i4 -i6 -itcp -w | grep 'testzookeeperconfigloadbalancing_zoo1_1.*testzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l", + "lsof -a -i4 -i6 -itcp -w | grep -P 'testzookeeperconfigloadbalancing_(gw\\d+_)?zoo1_1.*testzookeeperconfigloadbalancing_(gw\\d+_)?default:2181' | grep ESTABLISHED | wc -l", ], privileged=True, user="root", @@ -161,7 +161,7 @@ def test_in_order(started_cluster): [ "bash", "-c", - "lsof -a -i4 -i6 -itcp -w | grep 'testzookeeperconfigloadbalancing_zoo1_1.*testzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l", + "lsof -a -i4 -i6 -itcp -w | grep -P 'testzookeeperconfigloadbalancing_(gw\\d+_)?zoo1_1.*testzookeeperconfigloadbalancing_(gw\\d+_)?default:2181' | grep ESTABLISHED | wc -l", ], privileged=True, user="root", @@ -189,7 +189,7 @@ def test_in_order(started_cluster): [ "bash", "-c", - "lsof -a -i4 -i6 -itcp -w | grep 'testzookeeperconfigloadbalancing_zoo1_1.*testzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l", + "lsof -a -i4 -i6 -itcp -w | grep -P 'testzookeeperconfigloadbalancing_(gw\\d+_)?zoo1_1.*testzookeeperconfigloadbalancing_(gw\\d+_)?default:2181' | grep ESTABLISHED | wc -l", ], privileged=True, user="root", @@ -217,7 +217,7 @@ def test_in_order(started_cluster): [ "bash", "-c", - "lsof -a -i4 -i6 -itcp -w | grep 'testzookeeperconfigloadbalancing_zoo1_1.*testzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l", + "lsof -a -i4 -i6 -itcp -w | grep -P 'testzookeeperconfigloadbalancing_(gw\\d+_)?zoo1_1.*testzookeeperconfigloadbalancing_(gw\\d+_)?default:2181' | grep ESTABLISHED | wc -l", ], privileged=True, user="root", @@ -251,7 +251,7 @@ def test_nearest_hostname(started_cluster): [ "bash", "-c", - "lsof -a -i4 -i6 -itcp -w | grep 'testzookeeperconfigloadbalancing_zoo1_1.*testzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l", + "lsof -a -i4 -i6 -itcp -w | grep -P 'testzookeeperconfigloadbalancing_(gw\\d+_)?zoo1_1.*testzookeeperconfigloadbalancing_(gw\\d+_)?default:2181' | grep ESTABLISHED | wc -l", ], privileged=True, user="root", @@ -279,7 +279,7 @@ def test_nearest_hostname(started_cluster): [ "bash", "-c", - "lsof -a -i4 -i6 -itcp -w | grep 'testzookeeperconfigloadbalancing_zoo2_1.*testzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l", + "lsof -a -i4 -i6 -itcp -w | grep -P 'testzookeeperconfigloadbalancing_(gw\\d+_)?zoo2_1.*testzookeeperconfigloadbalancing_(gw\\d+_)?default:2181' | grep ESTABLISHED | wc -l", ], privileged=True, user="root", @@ -307,7 +307,7 @@ def test_nearest_hostname(started_cluster): [ "bash", "-c", - "lsof -a -i4 -i6 -itcp -w | grep 'testzookeeperconfigloadbalancing_zoo3_1.*testzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l", + "lsof -a -i4 -i6 -itcp -w | grep -P 'testzookeeperconfigloadbalancing_(gw\\d+_)?zoo3_1.*testzookeeperconfigloadbalancing_(gw\\d+_)?default:2181' | grep ESTABLISHED | wc -l", ], privileged=True, user="root", @@ -341,7 +341,7 @@ def test_hostname_levenshtein_distance(started_cluster): [ "bash", "-c", - "lsof -a -i4 -i6 -itcp -w | grep 'testzookeeperconfigloadbalancing_zoo1_1.*testzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l", + "lsof -a -i4 -i6 -itcp -w | grep -P 'testzookeeperconfigloadbalancing_(gw\\d+_)?zoo1_1.*testzookeeperconfigloadbalancing_(gw\\d+_)?default:2181' | grep ESTABLISHED | wc -l", ], privileged=True, user="root", @@ -369,7 +369,7 @@ def test_hostname_levenshtein_distance(started_cluster): [ "bash", "-c", - "lsof -a -i4 -i6 -itcp -w | grep 'testzookeeperconfigloadbalancing_zoo2_1.*testzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l", + "lsof -a -i4 -i6 -itcp -w | grep -P 'testzookeeperconfigloadbalancing_(gw\\d+_)?zoo2_1.*testzookeeperconfigloadbalancing_(gw\\d+_)?default:2181' | grep ESTABLISHED | wc -l", ], privileged=True, user="root", @@ -397,7 +397,7 @@ def test_hostname_levenshtein_distance(started_cluster): [ "bash", "-c", - "lsof -a -i4 -i6 -itcp -w | grep 'testzookeeperconfigloadbalancing_zoo3_1.*testzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l", + "lsof -a -i4 -i6 -itcp -w | grep -P 'testzookeeperconfigloadbalancing_(gw\\d+_)?zoo3_1.*testzookeeperconfigloadbalancing_(gw\\d+_)?default:2181' | grep ESTABLISHED | wc -l", ], privileged=True, user="root", From c77f6d78d976430faf4353e350d0205bbecf2837 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Thu, 1 Aug 2024 12:09:58 +0200 Subject: [PATCH 538/661] Update minio --- tests/integration/compose/docker_compose_minio.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/compose/docker_compose_minio.yml b/tests/integration/compose/docker_compose_minio.yml index 4255a529f6d..40098d05b04 100644 --- a/tests/integration/compose/docker_compose_minio.yml +++ b/tests/integration/compose/docker_compose_minio.yml @@ -2,7 +2,7 @@ version: '2.3' services: minio1: - image: minio/minio:RELEASE.2023-09-30T07-02-29Z + image: minio/minio:RELEASE.2024-07-31T05-46-26Z volumes: - data1-1:/data1 - ${MINIO_CERTS_DIR:-}:/certs From 8fc77bec6639c8f0361858d9e031f3cb1175ae30 Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Thu, 1 Aug 2024 13:14:14 +0200 Subject: [PATCH 539/661] fix tests --- ...403_enable_extended_results_for_datetime_functions.reference | 2 +- .../0_stateless/03215_toStartOfWeek_with_dateTime64_fix.sql | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/02403_enable_extended_results_for_datetime_functions.reference b/tests/queries/0_stateless/02403_enable_extended_results_for_datetime_functions.reference index c830d790000..147e368b5c9 100644 --- a/tests/queries/0_stateless/02403_enable_extended_results_for_datetime_functions.reference +++ b/tests/queries/0_stateless/02403_enable_extended_results_for_datetime_functions.reference @@ -64,7 +64,7 @@ toStartOfMonth;toDateTime64;false 2099-07-07 type;toStartOfMonth;toDateTime64;false Date toStartOfWeek;toDate32;false 2099-07-07 type;toStartOfWeek;toDate32;false Date -toStartOfWeek;toDateTime64;false 2099-07-07 +toStartOfWeek;toDateTime64;false 1970-01-01 type;toStartOfWeek;toDateTime64;false Date toMonday;toDate32;false 2099-07-08 type;toMonday;toDate32;false Date diff --git a/tests/queries/0_stateless/03215_toStartOfWeek_with_dateTime64_fix.sql b/tests/queries/0_stateless/03215_toStartOfWeek_with_dateTime64_fix.sql index 0f00a52cb86..1769d96aa8d 100644 --- a/tests/queries/0_stateless/03215_toStartOfWeek_with_dateTime64_fix.sql +++ b/tests/queries/0_stateless/03215_toStartOfWeek_with_dateTime64_fix.sql @@ -1,2 +1,2 @@ -SELECT toStartOfWeek(toDateTime64('1970-02-01', 6)); +SELECT toStartOfWeek(toDateTime64('1970-01-01', 6)); SELECT toStartOfWeek(toDateTime('1970-01-01')); From 2c018d6f4d4d6fa1c04e91306b60b6e85d8e468f Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Thu, 1 Aug 2024 13:38:41 +0200 Subject: [PATCH 540/661] Update cluster.py --- tests/integration/helpers/cluster.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index 90b28a4cda3..acf033de46d 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -248,7 +248,7 @@ def check_rabbitmq_is_available(rabbitmq_id, cookie): ), stdout=subprocess.PIPE, ) - p.communicate() + p.wait(timeout=60) return p.returncode == 0 From 029deaeee8431d0ef6f2a460c2bd8631c8025254 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Thu, 1 Aug 2024 14:13:11 +0200 Subject: [PATCH 541/661] Fix 02910_bad_logs_level_in_local in fast tests! --- tests/queries/0_stateless/02910_bad_logs_level_in_local.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/02910_bad_logs_level_in_local.sh b/tests/queries/0_stateless/02910_bad_logs_level_in_local.sh index badf7232a95..b5de10bf191 100755 --- a/tests/queries/0_stateless/02910_bad_logs_level_in_local.sh +++ b/tests/queries/0_stateless/02910_bad_logs_level_in_local.sh @@ -1,14 +1,14 @@ #!/usr/bin/expect -f log_user 0 -set timeout 60 +set timeout 30 match_max 100000 spawn bash -c "clickhouse-local" expect ":) " send -- "SET send_logs_level = 't'\r" -expect "Exception on client:" +expect "Unexpected value of LogsLevel:" {} timeout {exit 1} expect ":) " send -- "exit\r" expect eof From bcc75d3681d45b6637211aca0367703b3e957c05 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Thu, 1 Aug 2024 14:38:41 +0200 Subject: [PATCH 542/661] Make 02477_analyzer_function_hints.sh parallelizable --- .../0_stateless/02477_analyzer_function_hints.sh | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/tests/queries/0_stateless/02477_analyzer_function_hints.sh b/tests/queries/0_stateless/02477_analyzer_function_hints.sh index d49c20cab75..f83935e47fb 100755 --- a/tests/queries/0_stateless/02477_analyzer_function_hints.sh +++ b/tests/queries/0_stateless/02477_analyzer_function_hints.sh @@ -1,7 +1,5 @@ #!/usr/bin/env bash -# Tags: no-parallel - set -e CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) @@ -14,11 +12,11 @@ $CLICKHOUSE_CLIENT -q "SELECT plu(1, 1) SETTINGS allow_experimental_analyzer = 1 $CLICKHOUSE_CLIENT -q "SELECT uniqExac(1, 1) SETTINGS allow_experimental_analyzer = 1;" 2>&1 \ | grep "Maybe you meant: \['uniqExact'\]" &>/dev/null; -$CLICKHOUSE_CLIENT -q "DROP FUNCTION IF EXISTS test_user_defined_function;" -$CLICKHOUSE_CLIENT -q "CREATE FUNCTION test_user_defined_function AS x -> x + 1;" -$CLICKHOUSE_CLIENT -q "SELECT test_user_defined_functio(1) SETTINGS allow_experimental_analyzer = 1;" 2>&1 \ - | grep "Maybe you meant: \['test_user_defined_function'\]" &>/dev/null; -$CLICKHOUSE_CLIENT -q "DROP FUNCTION test_user_defined_function"; +$CLICKHOUSE_CLIENT -q "DROP FUNCTION IF EXISTS test_user_defined_function_$CLICKHOUSE_DATABASE;" +$CLICKHOUSE_CLIENT -q "CREATE FUNCTION test_user_defined_function_$CLICKHOUSE_DATABASE AS x -> x + 1;" +$CLICKHOUSE_CLIENT -q "SELECT test_user_defined_function_${CLICKHOUSE_DATABASE}A(1) SETTINGS allow_experimental_analyzer = 1;" 2>&1 \ + | grep -E "Maybe you meant: \[.*'test_user_defined_function_$CLICKHOUSE_DATABASE'.*\]" &>/dev/null; +$CLICKHOUSE_CLIENT -q "DROP FUNCTION test_user_defined_function_$CLICKHOUSE_DATABASE"; $CLICKHOUSE_CLIENT -q "WITH (x -> x + 1) AS lambda_function SELECT lambda_functio(1) SETTINGS allow_experimental_analyzer = 1;" 2>&1 \ | grep "Maybe you meant: \['lambda_function'\]" &>/dev/null; From 008408c81f23ed615cb899048418dc46aa3c2a9f Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Thu, 1 Aug 2024 13:47:34 +0100 Subject: [PATCH 543/661] impl --- .../0_stateless/01605_adaptive_granularity_block_borders.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/01605_adaptive_granularity_block_borders.sql b/tests/queries/0_stateless/01605_adaptive_granularity_block_borders.sql index 9b96ce3e586..aaeee466794 100644 --- a/tests/queries/0_stateless/01605_adaptive_granularity_block_borders.sql +++ b/tests/queries/0_stateless/01605_adaptive_granularity_block_borders.sql @@ -1,4 +1,4 @@ --- Tags: no-random-merge-tree-settings, no-tsan, no-debug, no-object-storage +-- Tags: no-random-merge-tree-settings, no-random-settings, no-tsan, no-debug, no-object-storage, long -- no-tsan: too slow -- no-object-storage: for remote tables we use thread pool even when reading with one stream, so memory consumption is higher @@ -16,7 +16,7 @@ CREATE TABLE adaptive_table( value String ) ENGINE MergeTree() ORDER BY key -SETTINGS index_granularity_bytes=1048576, +SETTINGS index_granularity_bytes = 1048576, min_bytes_for_wide_part = 0, min_rows_for_wide_part = 0, enable_vertical_merge_algorithm = 0; From 70228acd7e809230582883a0b6b70c4cd9c04daa Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 1 Aug 2024 15:02:29 +0200 Subject: [PATCH 544/661] Update CHANGELOG.md --- CHANGELOG.md | 1 - 1 file changed, 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a4c873ba3f9..5cd4200d9ad 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,7 +22,6 @@ #### New Feature * Add `ASOF JOIN` support for `full_sorting_join` algorithm. [#55051](https://github.com/ClickHouse/ClickHouse/pull/55051) ([vdimir](https://github.com/vdimir)). -* Add new window function `percent_rank`. [#62747](https://github.com/ClickHouse/ClickHouse/pull/62747) ([lgbo](https://github.com/lgbo-ustc)). * Support JWT authentication in `clickhouse-client` (will be available only in ClickHouse Cloud). [#62829](https://github.com/ClickHouse/ClickHouse/pull/62829) ([Konstantin Bogdanov](https://github.com/thevar1able)). * Add SQL functions `changeYear`, `changeMonth`, `changeDay`, `changeHour`, `changeMinute`, `changeSecond`. For example, `SELECT changeMonth(toDate('2024-06-14'), 7)` returns date `2024-07-14`. [#63186](https://github.com/ClickHouse/ClickHouse/pull/63186) ([cucumber95](https://github.com/cucumber95)). * Introduce startup scripts, which allow the execution of preconfigured queries at the startup stage. [#64889](https://github.com/ClickHouse/ClickHouse/pull/64889) ([pufit](https://github.com/pufit)). From 48c6e36dfd23f297907575ce4696f761aec49e11 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Thu, 1 Aug 2024 15:03:41 +0200 Subject: [PATCH 545/661] Make 01062_window_view_event_hop_watch_asc parallelizable --- .../01062_window_view_event_hop_watch_asc.py | 28 ++++++++----------- 1 file changed, 12 insertions(+), 16 deletions(-) diff --git a/tests/queries/0_stateless/01062_window_view_event_hop_watch_asc.py b/tests/queries/0_stateless/01062_window_view_event_hop_watch_asc.py index d6cc3ee1a88..3c85ff30ba8 100755 --- a/tests/queries/0_stateless/01062_window_view_event_hop_watch_asc.py +++ b/tests/queries/0_stateless/01062_window_view_event_hop_watch_asc.py @@ -1,5 +1,4 @@ #!/usr/bin/env python3 -# Tags: no-parallel import os import sys @@ -17,6 +16,7 @@ log = None with client(name="client1>", log=log) as client1, client( name="client2>", log=log ) as client2: + database_name = os.environ["CLICKHOUSE_DATABASE"] client1.expect(prompt) client2.expect(prompt) @@ -31,40 +31,38 @@ with client(name="client1>", log=log) as client1, client( client2.send("SET allow_experimental_analyzer = 0") client2.expect(prompt) - client1.send("CREATE DATABASE IF NOT EXISTS 01062_window_view_event_hop_watch_asc") + client1.send(f"DROP TABLE IF EXISTS {database_name}.mt") client1.expect(prompt) - client1.send("DROP TABLE IF EXISTS 01062_window_view_event_hop_watch_asc.mt") - client1.expect(prompt) - client1.send("DROP TABLE IF EXISTS 01062_window_view_event_hop_watch_asc.wv SYNC") + client1.send(f"DROP TABLE IF EXISTS {database_name}.wv SYNC") client1.expect(prompt) client1.send( - "CREATE TABLE 01062_window_view_event_hop_watch_asc.mt(a Int32, timestamp DateTime) ENGINE=MergeTree ORDER BY tuple()" + f"CREATE TABLE {database_name}.mt(a Int32, timestamp DateTime) ENGINE=MergeTree ORDER BY tuple()" ) client1.expect(prompt) client1.send( - "CREATE WINDOW VIEW 01062_window_view_event_hop_watch_asc.wv ENGINE Memory WATERMARK=ASCENDING AS SELECT count(a) AS count, hopEnd(wid) AS w_end FROM 01062_window_view_event_hop_watch_asc.mt GROUP BY hop(timestamp, INTERVAL '2' SECOND, INTERVAL '3' SECOND, 'US/Samoa') AS wid" + f"CREATE WINDOW VIEW {database_name}.wv ENGINE Memory WATERMARK=ASCENDING AS SELECT count(a) AS count, hopEnd(wid) AS w_end FROM {database_name}.mt GROUP BY hop(timestamp, INTERVAL '2' SECOND, INTERVAL '3' SECOND, 'US/Samoa') AS wid" ) client1.expect(prompt) - client1.send("WATCH 01062_window_view_event_hop_watch_asc.wv") + client1.send(f"WATCH {database_name}.wv") client1.expect("Query id" + end_of_block) client1.expect("Progress: 0.00 rows.*\\)") client2.send( - "INSERT INTO 01062_window_view_event_hop_watch_asc.mt VALUES (1, toDateTime('1990/01/01 12:00:00', 'US/Samoa'));" + f"INSERT INTO {database_name}.mt VALUES (1, toDateTime('1990/01/01 12:00:00', 'US/Samoa'));" ) client2.expect(prompt) client2.send( - "INSERT INTO 01062_window_view_event_hop_watch_asc.mt VALUES (1, toDateTime('1990/01/01 12:00:05', 'US/Samoa'));" + f"INSERT INTO {database_name}.mt VALUES (1, toDateTime('1990/01/01 12:00:05', 'US/Samoa'));" ) client2.expect(prompt) client1.expect("1*" + end_of_block) client2.send( - "INSERT INTO 01062_window_view_event_hop_watch_asc.mt VALUES (1, toDateTime('1990/01/01 12:00:06', 'US/Samoa'));" + f"INSERT INTO {database_name}.mt VALUES (1, toDateTime('1990/01/01 12:00:06', 'US/Samoa'));" ) client2.expect(prompt) client2.send( - "INSERT INTO 01062_window_view_event_hop_watch_asc.mt VALUES (1, toDateTime('1990/01/01 12:00:10', 'US/Samoa'));" + f"INSERT INTO {database_name}.mt VALUES (1, toDateTime('1990/01/01 12:00:10', 'US/Samoa'));" ) client2.expect(prompt) client1.expect("1" + end_of_block) @@ -77,9 +75,7 @@ with client(name="client1>", log=log) as client1, client( if match.groups()[1]: client1.send(client1.command) client1.expect(prompt) - client1.send("DROP TABLE 01062_window_view_event_hop_watch_asc.wv SYNC") + client1.send(f"DROP TABLE {database_name}.wv SYNC") client1.expect(prompt) - client1.send("DROP TABLE 01062_window_view_event_hop_watch_asc.mt") - client1.expect(prompt) - client1.send("DROP DATABASE IF EXISTS 01062_window_view_event_hop_watch_asc") + client1.send(f"DROP TABLE {database_name}.mt") client1.expect(prompt) From 7b72362e99a093a4f880d333c1b50cd114b590c0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Thu, 1 Aug 2024 15:06:27 +0200 Subject: [PATCH 546/661] 01493_alter_remove_properties_zookeeper is already parallelizable --- .../0_stateless/01493_alter_remove_properties_zookeeper.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01493_alter_remove_properties_zookeeper.sql b/tests/queries/0_stateless/01493_alter_remove_properties_zookeeper.sql index 92e6fce2c93..362da3ac364 100644 --- a/tests/queries/0_stateless/01493_alter_remove_properties_zookeeper.sql +++ b/tests/queries/0_stateless/01493_alter_remove_properties_zookeeper.sql @@ -1,4 +1,4 @@ --- Tags: zookeeper, no-parallel +-- Tags: zookeeper DROP TABLE IF EXISTS r_prop_table1; DROP TABLE IF EXISTS r_prop_table2; From 56e48cf43b4d13810e2bb4b4e941954b654a1cb3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Thu, 1 Aug 2024 15:09:56 +0200 Subject: [PATCH 547/661] Make 01676_range_hashed_dictionary parallelizable --- .../01676_range_hashed_dictionary.sql | 92 +++++++++---------- 1 file changed, 42 insertions(+), 50 deletions(-) diff --git a/tests/queries/0_stateless/01676_range_hashed_dictionary.sql b/tests/queries/0_stateless/01676_range_hashed_dictionary.sql index 430f3a86dc1..ba2a9eba87f 100644 --- a/tests/queries/0_stateless/01676_range_hashed_dictionary.sql +++ b/tests/queries/0_stateless/01676_range_hashed_dictionary.sql @@ -1,10 +1,4 @@ --- Tags: no-parallel - -DROP DATABASE IF EXISTS database_for_range_dict; - -CREATE DATABASE database_for_range_dict; - -CREATE TABLE database_for_range_dict.date_table +CREATE TABLE date_table ( CountryID UInt64, StartDate Date, @@ -14,11 +8,11 @@ CREATE TABLE database_for_range_dict.date_table ENGINE = MergeTree() ORDER BY CountryID; -INSERT INTO database_for_range_dict.date_table VALUES(1, toDate('2019-05-05'), toDate('2019-05-20'), 0.33); -INSERT INTO database_for_range_dict.date_table VALUES(1, toDate('2019-05-21'), toDate('2019-05-30'), 0.42); -INSERT INTO database_for_range_dict.date_table VALUES(2, toDate('2019-05-21'), toDate('2019-05-30'), 0.46); +INSERT INTO date_table VALUES(1, toDate('2019-05-05'), toDate('2019-05-20'), 0.33); +INSERT INTO date_table VALUES(1, toDate('2019-05-21'), toDate('2019-05-30'), 0.42); +INSERT INTO date_table VALUES(2, toDate('2019-05-21'), toDate('2019-05-30'), 0.46); -CREATE DICTIONARY database_for_range_dict.range_dictionary +CREATE DICTIONARY range_dictionary ( CountryID UInt64, StartDate Date, @@ -26,7 +20,7 @@ CREATE DICTIONARY database_for_range_dict.range_dictionary Tax Float64 DEFAULT 0.2 ) PRIMARY KEY CountryID -SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'date_table' DB 'database_for_range_dict')) +SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'date_table' DB currentDatabase())) LIFETIME(MIN 1 MAX 1000) LAYOUT(RANGE_HASHED()) RANGE(MIN StartDate MAX EndDate) @@ -35,30 +29,30 @@ SETTINGS(dictionary_use_async_executor=1, max_threads=8) SELECT 'Dictionary not nullable'; SELECT 'dictGet'; -SELECT dictGet('database_for_range_dict.range_dictionary', 'Tax', toUInt64(1), toDate('2019-05-15')); -SELECT dictGet('database_for_range_dict.range_dictionary', 'Tax', toUInt64(1), toDate('2019-05-29')); -SELECT dictGet('database_for_range_dict.range_dictionary', 'Tax', toUInt64(2), toDate('2019-05-29')); -SELECT dictGet('database_for_range_dict.range_dictionary', 'Tax', toUInt64(2), toDate('2019-05-31')); -SELECT dictGetOrDefault('database_for_range_dict.range_dictionary', 'Tax', toUInt64(2), toDate('2019-05-31'), 0.4); +SELECT dictGet('range_dictionary', 'Tax', toUInt64(1), toDate('2019-05-15')); +SELECT dictGet('range_dictionary', 'Tax', toUInt64(1), toDate('2019-05-29')); +SELECT dictGet('range_dictionary', 'Tax', toUInt64(2), toDate('2019-05-29')); +SELECT dictGet('range_dictionary', 'Tax', toUInt64(2), toDate('2019-05-31')); +SELECT dictGetOrDefault('range_dictionary', 'Tax', toUInt64(2), toDate('2019-05-31'), 0.4); SELECT 'dictHas'; -SELECT dictHas('database_for_range_dict.range_dictionary', toUInt64(1), toDate('2019-05-15')); -SELECT dictHas('database_for_range_dict.range_dictionary', toUInt64(1), toDate('2019-05-29')); -SELECT dictHas('database_for_range_dict.range_dictionary', toUInt64(2), toDate('2019-05-29')); -SELECT dictHas('database_for_range_dict.range_dictionary', toUInt64(2), toDate('2019-05-31')); +SELECT dictHas('range_dictionary', toUInt64(1), toDate('2019-05-15')); +SELECT dictHas('range_dictionary', toUInt64(1), toDate('2019-05-29')); +SELECT dictHas('range_dictionary', toUInt64(2), toDate('2019-05-29')); +SELECT dictHas('range_dictionary', toUInt64(2), toDate('2019-05-31')); SELECT 'select columns from dictionary'; SELECT 'allColumns'; -SELECT * FROM database_for_range_dict.range_dictionary ORDER BY CountryID, StartDate, EndDate; +SELECT * FROM range_dictionary ORDER BY CountryID, StartDate, EndDate; SELECT 'noColumns'; -SELECT 1 FROM database_for_range_dict.range_dictionary ORDER BY CountryID, StartDate, EndDate; +SELECT 1 FROM range_dictionary ORDER BY CountryID, StartDate, EndDate; SELECT 'onlySpecificColumns'; -SELECT CountryID, StartDate, Tax FROM database_for_range_dict.range_dictionary ORDER BY CountryID, StartDate, EndDate; +SELECT CountryID, StartDate, Tax FROM range_dictionary ORDER BY CountryID, StartDate, EndDate; SELECT 'onlySpecificColumn'; -SELECT Tax FROM database_for_range_dict.range_dictionary ORDER BY CountryID, StartDate, EndDate; +SELECT Tax FROM range_dictionary ORDER BY CountryID, StartDate, EndDate; -DROP DICTIONARY database_for_range_dict.range_dictionary; -DROP TABLE database_for_range_dict.date_table; +DROP DICTIONARY range_dictionary; +DROP TABLE date_table; -CREATE TABLE database_for_range_dict.date_table +CREATE TABLE date_table ( CountryID UInt64, StartDate Date, @@ -68,11 +62,11 @@ CREATE TABLE database_for_range_dict.date_table ENGINE = MergeTree() ORDER BY CountryID; -INSERT INTO database_for_range_dict.date_table VALUES(1, toDate('2019-05-05'), toDate('2019-05-20'), 0.33); -INSERT INTO database_for_range_dict.date_table VALUES(1, toDate('2019-05-21'), toDate('2019-05-30'), 0.42); -INSERT INTO database_for_range_dict.date_table VALUES(2, toDate('2019-05-21'), toDate('2019-05-30'), NULL); +INSERT INTO date_table VALUES(1, toDate('2019-05-05'), toDate('2019-05-20'), 0.33); +INSERT INTO date_table VALUES(1, toDate('2019-05-21'), toDate('2019-05-30'), 0.42); +INSERT INTO date_table VALUES(2, toDate('2019-05-21'), toDate('2019-05-30'), NULL); -CREATE DICTIONARY database_for_range_dict.range_dictionary_nullable +CREATE DICTIONARY range_dictionary_nullable ( CountryID UInt64, StartDate Date, @@ -80,35 +74,33 @@ CREATE DICTIONARY database_for_range_dict.range_dictionary_nullable Tax Nullable(Float64) DEFAULT 0.2 ) PRIMARY KEY CountryID -SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'date_table' DB 'database_for_range_dict')) +SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'date_table' DB currentDatabase())) LIFETIME(MIN 1 MAX 1000) LAYOUT(RANGE_HASHED()) RANGE(MIN StartDate MAX EndDate); SELECT 'Dictionary nullable'; SELECT 'dictGet'; -SELECT dictGet('database_for_range_dict.range_dictionary_nullable', 'Tax', toUInt64(1), toDate('2019-05-15')); -SELECT dictGet('database_for_range_dict.range_dictionary_nullable', 'Tax', toUInt64(1), toDate('2019-05-29')); -SELECT dictGet('database_for_range_dict.range_dictionary_nullable', 'Tax', toUInt64(2), toDate('2019-05-29')); -SELECT dictGet('database_for_range_dict.range_dictionary_nullable', 'Tax', toUInt64(2), toDate('2019-05-31')); -SELECT dictGetOrDefault('database_for_range_dict.range_dictionary_nullable', 'Tax', toUInt64(2), toDate('2019-05-31'), 0.4); +SELECT dictGet('range_dictionary_nullable', 'Tax', toUInt64(1), toDate('2019-05-15')); +SELECT dictGet('range_dictionary_nullable', 'Tax', toUInt64(1), toDate('2019-05-29')); +SELECT dictGet('range_dictionary_nullable', 'Tax', toUInt64(2), toDate('2019-05-29')); +SELECT dictGet('range_dictionary_nullable', 'Tax', toUInt64(2), toDate('2019-05-31')); +SELECT dictGetOrDefault('range_dictionary_nullable', 'Tax', toUInt64(2), toDate('2019-05-31'), 0.4); SELECT 'dictHas'; -SELECT dictHas('database_for_range_dict.range_dictionary_nullable', toUInt64(1), toDate('2019-05-15')); -SELECT dictHas('database_for_range_dict.range_dictionary_nullable', toUInt64(1), toDate('2019-05-29')); -SELECT dictHas('database_for_range_dict.range_dictionary_nullable', toUInt64(2), toDate('2019-05-29')); -SELECT dictHas('database_for_range_dict.range_dictionary_nullable', toUInt64(2), toDate('2019-05-31')); +SELECT dictHas('range_dictionary_nullable', toUInt64(1), toDate('2019-05-15')); +SELECT dictHas('range_dictionary_nullable', toUInt64(1), toDate('2019-05-29')); +SELECT dictHas('range_dictionary_nullable', toUInt64(2), toDate('2019-05-29')); +SELECT dictHas('range_dictionary_nullable', toUInt64(2), toDate('2019-05-31')); SELECT 'select columns from dictionary'; SELECT 'allColumns'; -SELECT * FROM database_for_range_dict.range_dictionary_nullable ORDER BY CountryID, StartDate, EndDate; +SELECT * FROM range_dictionary_nullable ORDER BY CountryID, StartDate, EndDate; SELECT 'noColumns'; -SELECT 1 FROM database_for_range_dict.range_dictionary_nullable ORDER BY CountryID, StartDate, EndDate; +SELECT 1 FROM range_dictionary_nullable ORDER BY CountryID, StartDate, EndDate; SELECT 'onlySpecificColumns'; -SELECT CountryID, StartDate, Tax FROM database_for_range_dict.range_dictionary_nullable ORDER BY CountryID, StartDate, EndDate; +SELECT CountryID, StartDate, Tax FROM range_dictionary_nullable ORDER BY CountryID, StartDate, EndDate; SELECT 'onlySpecificColumn'; -SELECT Tax FROM database_for_range_dict.range_dictionary_nullable ORDER BY CountryID, StartDate, EndDate; +SELECT Tax FROM range_dictionary_nullable ORDER BY CountryID, StartDate, EndDate; -DROP DICTIONARY database_for_range_dict.range_dictionary_nullable; -DROP TABLE database_for_range_dict.date_table; - -DROP DATABASE database_for_range_dict; +DROP DICTIONARY range_dictionary_nullable; +DROP TABLE date_table; From bb7039eeec01ce59008103727f8c03ddd26a3d29 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Thu, 1 Aug 2024 15:17:00 +0200 Subject: [PATCH 548/661] Make 01107_atomic_db_detach_attach parallelizable --- .../01107_atomic_db_detach_attach.sh | 31 ++++++++++--------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/tests/queries/0_stateless/01107_atomic_db_detach_attach.sh b/tests/queries/0_stateless/01107_atomic_db_detach_attach.sh index bcaa70abbb5..e9879344259 100755 --- a/tests/queries/0_stateless/01107_atomic_db_detach_attach.sh +++ b/tests/queries/0_stateless/01107_atomic_db_detach_attach.sh @@ -1,29 +1,30 @@ #!/usr/bin/env bash -# Tags: no-parallel, no-fasttest +# Tags: no-fasttest CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -$CLICKHOUSE_CLIENT -q "DROP DATABASE IF EXISTS test_01107" -$CLICKHOUSE_CLIENT -q "CREATE DATABASE test_01107 ENGINE=Atomic" -$CLICKHOUSE_CLIENT -q "CREATE TABLE test_01107.mt (n UInt64) ENGINE=MergeTree() ORDER BY tuple()" +NEW_DATABASE=test_01107_${CLICKHOUSE_DATABASE} +$CLICKHOUSE_CLIENT -q "DROP DATABASE IF EXISTS ${NEW_DATABASE}" +$CLICKHOUSE_CLIENT -q "CREATE DATABASE ${NEW_DATABASE} ENGINE=Atomic" +$CLICKHOUSE_CLIENT -q "CREATE TABLE ${NEW_DATABASE}.mt (n UInt64) ENGINE=MergeTree() ORDER BY tuple()" -$CLICKHOUSE_CLIENT --function_sleep_max_microseconds_per_block 60000000 -q "INSERT INTO test_01107.mt SELECT number + sleepEachRow(3) FROM numbers(5)" & +$CLICKHOUSE_CLIENT --function_sleep_max_microseconds_per_block 60000000 -q "INSERT INTO ${NEW_DATABASE}.mt SELECT number + sleepEachRow(3) FROM numbers(5)" & sleep 1 -$CLICKHOUSE_CLIENT -q "DETACH TABLE test_01107.mt" --database_atomic_wait_for_drop_and_detach_synchronously=0 -$CLICKHOUSE_CLIENT -q "ATTACH TABLE test_01107.mt" --database_atomic_wait_for_drop_and_detach_synchronously=0 2>&1 | grep -F "Code: 57" > /dev/null && echo "OK" -$CLICKHOUSE_CLIENT -q "DETACH DATABASE test_01107" --database_atomic_wait_for_drop_and_detach_synchronously=0 2>&1 | grep -F "Code: 219" > /dev/null && echo "OK" +$CLICKHOUSE_CLIENT -q "DETACH TABLE ${NEW_DATABASE}.mt" --database_atomic_wait_for_drop_and_detach_synchronously=0 +$CLICKHOUSE_CLIENT -q "ATTACH TABLE ${NEW_DATABASE}.mt" --database_atomic_wait_for_drop_and_detach_synchronously=0 2>&1 | grep -F "Code: 57" > /dev/null && echo "OK" +$CLICKHOUSE_CLIENT -q "DETACH DATABASE ${NEW_DATABASE}" --database_atomic_wait_for_drop_and_detach_synchronously=0 2>&1 | grep -F "Code: 219" > /dev/null && echo "OK" wait -$CLICKHOUSE_CLIENT -q "ATTACH TABLE test_01107.mt" -$CLICKHOUSE_CLIENT -q "SELECT count(n), sum(n) FROM test_01107.mt" -$CLICKHOUSE_CLIENT -q "DETACH DATABASE test_01107" --database_atomic_wait_for_drop_and_detach_synchronously=0 -$CLICKHOUSE_CLIENT -q "ATTACH DATABASE test_01107" -$CLICKHOUSE_CLIENT -q "SELECT count(n), sum(n) FROM test_01107.mt" +$CLICKHOUSE_CLIENT -q "ATTACH TABLE ${NEW_DATABASE}.mt" +$CLICKHOUSE_CLIENT -q "SELECT count(n), sum(n) FROM ${NEW_DATABASE}.mt" +$CLICKHOUSE_CLIENT -q "DETACH DATABASE ${NEW_DATABASE}" --database_atomic_wait_for_drop_and_detach_synchronously=0 +$CLICKHOUSE_CLIENT -q "ATTACH DATABASE ${NEW_DATABASE}" +$CLICKHOUSE_CLIENT -q "SELECT count(n), sum(n) FROM ${NEW_DATABASE}.mt" -$CLICKHOUSE_CLIENT --function_sleep_max_microseconds_per_block 60000000 -q "INSERT INTO test_01107.mt SELECT number + sleepEachRow(1) FROM numbers(5)" && echo "end" & +$CLICKHOUSE_CLIENT --function_sleep_max_microseconds_per_block 60000000 -q "INSERT INTO ${NEW_DATABASE}.mt SELECT number + sleepEachRow(1) FROM numbers(5)" && echo "end" & sleep 1 -$CLICKHOUSE_CLIENT -q "DROP DATABASE test_01107" --database_atomic_wait_for_drop_and_detach_synchronously=0 && sleep 1 && echo "dropped" +$CLICKHOUSE_CLIENT -q "DROP DATABASE ${NEW_DATABASE}" --database_atomic_wait_for_drop_and_detach_synchronously=0 && sleep 1 && echo "dropped" wait From 1d85f9b1cba3c8fe168286a660d3c0a4fd471a95 Mon Sep 17 00:00:00 2001 From: Michael Stetsyuk Date: Thu, 1 Aug 2024 14:42:58 +0100 Subject: [PATCH 549/661] fix remove_local_directory_contents --- tests/integration/test_storage_delta/test.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/tests/integration/test_storage_delta/test.py b/tests/integration/test_storage_delta/test.py index e485bc90ee0..384b8296f66 100644 --- a/tests/integration/test_storage_delta/test.py +++ b/tests/integration/test_storage_delta/test.py @@ -52,9 +52,13 @@ def get_spark(): return builder.master("local").getOrCreate() -def remove_local_directory_contents(local_path): - for local_file in glob.glob(local_path + "/**"): - os.unlink(local_file) +def remove_local_directory_contents(full_path): + for path in glob.glob(f"{full_path}/**"): + if os.path.isfile(path): + os.unlink(path) + else: + remove_local_directory_contents(path) + os.rmdir(path) @pytest.fixture(scope="module") From 0913f0189ba350236d32d774265770b654860a80 Mon Sep 17 00:00:00 2001 From: Alex Katsman Date: Wed, 31 Jul 2024 09:06:30 +0000 Subject: [PATCH 550/661] Don't count a search query as a search pattern match --- tests/integration/helpers/cluster.py | 10 +++++++--- .../integration/test_mask_sensitive_info/test.py | 15 +++++++-------- 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index 90b28a4cda3..6bc0ece63ca 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -3922,7 +3922,11 @@ class ClickHouseInstance: ) def contains_in_log( - self, substring, from_host=False, filename="clickhouse-server.log" + self, + substring, + from_host=False, + filename="clickhouse-server.log", + exclusion_substring="", ): if from_host: # We check fist file exists but want to look for all rotated logs as well @@ -3930,7 +3934,7 @@ class ClickHouseInstance: [ "bash", "-c", - f'[ -f {self.logs_dir}/{filename} ] && zgrep -aH "{substring}" {self.logs_dir}/{filename}* || true', + f'[ -f {self.logs_dir}/{filename} ] && zgrep -aH "{substring}" {self.logs_dir}/{filename}* | ( [ -z "{exclusion_substring}" ] && cat || grep -v "${exclusion_substring}" ) || true', ] ) else: @@ -3938,7 +3942,7 @@ class ClickHouseInstance: [ "bash", "-c", - f'[ -f /var/log/clickhouse-server/{filename} ] && zgrep -aH "{substring}" /var/log/clickhouse-server/{filename} || true', + f'[ -f /var/log/clickhouse-server/{filename} ] && zgrep -aH "{substring}" /var/log/clickhouse-server/{filename} | ( [ -z "{exclusion_substring}" ] && cat || grep -v "${exclusion_substring}" ) || true', ] ) return len(result) > 0 diff --git a/tests/integration/test_mask_sensitive_info/test.py b/tests/integration/test_mask_sensitive_info/test.py index 902d3800324..6f6dc4d287f 100644 --- a/tests/integration/test_mask_sensitive_info/test.py +++ b/tests/integration/test_mask_sensitive_info/test.py @@ -13,6 +13,7 @@ node = cluster.add_instance( with_zookeeper=True, with_azurite=True, ) +base_search_query = "SELECT COUNT() FROM system.query_log WHERE query LIKE " @pytest.fixture(scope="module", autouse=True) @@ -35,7 +36,7 @@ def check_logs(must_contain=[], must_not_contain=[]): .replace("]", "\\]") .replace("*", "\\*") ) - assert node.contains_in_log(escaped_str) + assert node.contains_in_log(escaped_str, exclusion_substring=base_search_query) for str in must_not_contain: escaped_str = ( @@ -44,7 +45,9 @@ def check_logs(must_contain=[], must_not_contain=[]): .replace("]", "\\]") .replace("*", "\\*") ) - assert not node.contains_in_log(escaped_str) + assert not node.contains_in_log( + escaped_str, exclusion_substring=base_search_query + ) for str in must_contain: escaped_str = str.replace("'", "\\'") @@ -60,7 +63,7 @@ def system_query_log_contains_search_pattern(search_pattern): return ( int( node.query( - f"SELECT COUNT() FROM system.query_log WHERE query LIKE '%{search_pattern}%'" + f"{base_search_query}'%{search_pattern}%' AND query NOT LIKE '{base_search_query}%'" ).strip() ) >= 1 @@ -105,7 +108,6 @@ def test_create_alter_user(): must_not_contain=[ password, "IDENTIFIED BY", - "IDENTIFIED BY", "IDENTIFIED WITH plaintext_password BY", ], ) @@ -366,10 +368,7 @@ def test_table_functions(): f"remoteSecure(named_collection_6, addresses_expr = '127.{{2..11}}', database = 'default', table = 'remote_table', user = 'remote_user', password = '{password}')", f"s3('http://minio1:9001/root/data/test9.csv.gz', 'NOSIGN', 'CSV')", f"s3('http://minio1:9001/root/data/test10.csv.gz', 'minio', '{password}')", - ( - f"deltaLake('http://minio1:9001/root/data/test11.csv.gz', 'minio', '{password}')", - "DNS_ERROR", - ), + f"deltaLake('http://minio1:9001/root/data/test11.csv.gz', 'minio', '{password}')", f"azureBlobStorage('{azure_conn_string}', 'cont', 'test_simple.csv', 'CSV')", f"azureBlobStorage('{azure_conn_string}', 'cont', 'test_simple_1.csv', 'CSV', 'none')", f"azureBlobStorage('{azure_conn_string}', 'cont', 'test_simple_2.csv', 'CSV', 'none', 'auto')", From 048fbacc40062b05510916134c9d9525e7fab63a Mon Sep 17 00:00:00 2001 From: Tyler Hannan Date: Thu, 1 Aug 2024 16:48:19 +0200 Subject: [PATCH 551/661] Update README.md --- README.md | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 98f9108f14c..2120a4d1211 100644 --- a/README.md +++ b/README.md @@ -34,17 +34,13 @@ curl https://clickhouse.com/ | sh Every month we get together with the community (users, contributors, customers, those interested in learning more about ClickHouse) to discuss what is coming in the latest release. If you are interested in sharing what you've built on ClickHouse, let us know. -* [v24.7 Community Call](https://clickhouse.com/company/events/v24-7-community-release-call) - Jul 30 +* [v24.8 Community Call](https://clickhouse.com/company/events/v24-8-community-release-call) - August 29 ## Upcoming Events Keep an eye out for upcoming meetups and events around the world. Somewhere else you want us to be? Please feel free to reach out to tyler `` clickhouse `` com. You can also peruse [ClickHouse Events](https://clickhouse.com/company/news-events) for a list of all upcoming trainings, meetups, speaking engagements, etc. -* [ClickHouse Meetup in Paris](https://www.meetup.com/clickhouse-france-user-group/events/300783448/) - Jul 9 -* [ClickHouse Cloud - Live Update Call](https://clickhouse.com/company/events/202407-cloud-update-live) - Jul 9 -* [ClickHouse Meetup @ Ramp - New York City](https://www.meetup.com/clickhouse-new-york-user-group/events/300595845/) - Jul 9 -* [AWS Summit in New York](https://clickhouse.com/company/events/2024-07-awssummit-nyc) - Jul 10 -* [ClickHouse Meetup @ Klaviyo - Boston](https://www.meetup.com/clickhouse-boston-user-group/events/300907870) - Jul 11 +* MORE COMING SOON! ## Recent Recordings * **Recent Meetup Videos**: [Meetup Playlist](https://www.youtube.com/playlist?list=PL0Z2YDlm0b3iNDUzpY1S3L_iV4nARda_U) Whenever possible recordings of the ClickHouse Community Meetups are edited and presented as individual talks. Current featuring "Modern SQL in 2023", "Fast, Concurrent, and Consistent Asynchronous INSERTS in ClickHouse", and "Full-Text Indices: Design and Experiments" From 582bcfdc03e1124500325c3104497719473657cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Thu, 1 Aug 2024 16:57:41 +0200 Subject: [PATCH 552/661] Add no-parallel back to 01107_atomic_db_detach_attach --- tests/queries/0_stateless/01107_atomic_db_detach_attach.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01107_atomic_db_detach_attach.sh b/tests/queries/0_stateless/01107_atomic_db_detach_attach.sh index e9879344259..a6a99aadac2 100755 --- a/tests/queries/0_stateless/01107_atomic_db_detach_attach.sh +++ b/tests/queries/0_stateless/01107_atomic_db_detach_attach.sh @@ -1,5 +1,6 @@ #!/usr/bin/env bash -# Tags: no-fasttest +# Tags: no-fasttest, no-parallel +# no-parallel: FIXME: Timing issues with INSERT + DETACH (https://github.com/ClickHouse/ClickHouse/pull/67610/files#r1700345054) CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh From 0978441a0261c6003c7a9f4661ac87138e909622 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 31 Jul 2024 17:38:20 +0200 Subject: [PATCH 553/661] Minor change --- src/Databases/DatabaseOnDisk.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Databases/DatabaseOnDisk.cpp b/src/Databases/DatabaseOnDisk.cpp index f419f5811a1..734f354d9a5 100644 --- a/src/Databases/DatabaseOnDisk.cpp +++ b/src/Databases/DatabaseOnDisk.cpp @@ -313,7 +313,7 @@ void DatabaseOnDisk::detachTablePermanently(ContextPtr query_context, const Stri std::lock_guard lock(mutex); if (const auto it = snapshot_detached_tables.find(table_name); it == snapshot_detached_tables.end()) { - throw Exception(ErrorCodes::LOGICAL_ERROR, "Snapshot doesn't contain info about detached table={}", table_name); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Snapshot doesn't contain info about detached table `{}`", table_name); } else { From 53cbb4811047cad2bdf2b882bc89ff9a83ac4577 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Thu, 1 Aug 2024 18:06:05 +0200 Subject: [PATCH 554/661] Try fix 03143_asof_join_ddb_long --- tests/queries/0_stateless/03143_asof_join_ddb_long.sql | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/03143_asof_join_ddb_long.sql b/tests/queries/0_stateless/03143_asof_join_ddb_long.sql index 17a67511030..0b17ade5d1c 100644 --- a/tests/queries/0_stateless/03143_asof_join_ddb_long.sql +++ b/tests/queries/0_stateless/03143_asof_join_ddb_long.sql @@ -1,4 +1,5 @@ --- Tags: long +-- Tags: long, no-random-merge-tree-settings +-- no-random-merge-tree-settings - times out in private DROP TABLE IF EXISTS build; DROP TABLE IF EXISTS skewed_probe; From 9362d1a5668bcd6e4e629ab26ec44d4bc8cb6513 Mon Sep 17 00:00:00 2001 From: Max K Date: Wed, 31 Jul 2024 15:04:13 +0200 Subject: [PATCH 555/661] CI: Create release workflow dry run fix fix --- .github/actions/release/action.yml | 30 +++--- .github/workflows/create_release.yml | 144 +++++++++++++++++++++++++-- tests/ci/create_release.py | 2 +- 3 files changed, 153 insertions(+), 23 deletions(-) diff --git a/.github/actions/release/action.yml b/.github/actions/release/action.yml index c3897682a33..a287aa8b41d 100644 --- a/.github/actions/release/action.yml +++ b/.github/actions/release/action.yml @@ -16,8 +16,7 @@ inputs: - new dry-run: description: 'Dry run' - required: false - default: true + required: true type: boolean token: required: true @@ -30,8 +29,7 @@ runs: shell: bash run: | python3 ./tests/ci/create_release.py --prepare-release-info \ - --ref ${{ inputs.ref }} --release-type ${{ inputs.type }} \ - ${{ inputs.dry-run && '--dry-run' || '' }} + --ref ${{ inputs.ref }} --release-type ${{ inputs.type }} ${{ inputs.dry-run == true && '--dry-run' || '' }} echo "::group::Release Info" python3 -m json.tool /tmp/release_info.json echo "::endgroup::" @@ -44,20 +42,20 @@ runs: if: ${{ inputs.type == 'patch' }} shell: bash run: | - python3 ./tests/ci/create_release.py --download-packages ${{ inputs.dry-run && '--dry-run' || '' }} + python3 ./tests/ci/create_release.py --download-packages ${{ inputs.dry-run == true && '--dry-run' || '' }} - name: Push Git Tag for the Release shell: bash run: | - python3 ./tests/ci/create_release.py --push-release-tag ${{ inputs.dry-run && '--dry-run' || '' }} + python3 ./tests/ci/create_release.py --push-release-tag ${{ inputs.dry-run == true && '--dry-run' || '' }} - name: Push New Release Branch if: ${{ inputs.type == 'new' }} shell: bash run: | - python3 ./tests/ci/create_release.py --push-new-release-branch ${{ inputs.dry-run && '--dry-run' || '' }} + python3 ./tests/ci/create_release.py --push-new-release-branch ${{ inputs.dry-run == true && '--dry-run' || '' }} - name: Bump CH Version and Update Contributors' List shell: bash run: | - python3 ./tests/ci/create_release.py --create-bump-version-pr ${{ inputs.dry-run && '--dry-run' || '' }} + python3 ./tests/ci/create_release.py --create-bump-version-pr ${{ inputs.dry-run == true && '--dry-run' || '' }} - name: Bump Docker versions, Changelog, Security if: ${{ inputs.type == 'patch' }} shell: bash @@ -107,37 +105,37 @@ runs: shell: bash if: ${{ inputs.type == 'patch' }} run: | - python3 ./tests/ci/create_release.py --create-gh-release ${{ inputs.dry-run && '--dry-run' || '' }} + python3 ./tests/ci/create_release.py --create-gh-release ${{ inputs.dry-run == true && '--dry-run' || '' }} - name: Export TGZ Packages if: ${{ inputs.type == 'patch' }} shell: bash run: | - python3 ./tests/ci/artifactory.py --export-tgz ${{ inputs.dry-run && '--dry-run' || '' }} + python3 ./tests/ci/artifactory.py --export-tgz ${{ inputs.dry-run == true && '--dry-run' || '' }} - name: Test TGZ Packages if: ${{ inputs.type == 'patch' }} shell: bash run: | - python3 ./tests/ci/artifactory.py --test-tgz ${{ inputs.dry-run && '--dry-run' || '' }} + python3 ./tests/ci/artifactory.py --test-tgz ${{ inputs.dry-run == true && '--dry-run' || '' }} - name: Export RPM Packages if: ${{ inputs.type == 'patch' }} shell: bash run: | - python3 ./tests/ci/artifactory.py --export-rpm ${{ inputs.dry-run && '--dry-run' || '' }} + python3 ./tests/ci/artifactory.py --export-rpm ${{ inputs.dry-run == true && '--dry-run' || '' }} - name: Test RPM Packages if: ${{ inputs.type == 'patch' }} shell: bash run: | - python3 ./tests/ci/artifactory.py --test-rpm ${{ inputs.dry-run && '--dry-run' || '' }} + python3 ./tests/ci/artifactory.py --test-rpm ${{ inputs.dry-run == true && '--dry-run' || '' }} - name: Export Debian Packages if: ${{ inputs.type == 'patch' }} shell: bash run: | - python3 ./tests/ci/artifactory.py --export-debian ${{ inputs.dry-run && '--dry-run' || '' }} + python3 ./tests/ci/artifactory.py --export-debian ${{ inputs.dry-run == true && '--dry-run' || '' }} - name: Test Debian Packages if: ${{ inputs.type == 'patch' }} shell: bash run: | - python3 ./tests/ci/artifactory.py --test-debian ${{ inputs.dry-run && '--dry-run' || '' }} + python3 ./tests/ci/artifactory.py --test-debian ${{ inputs.dry-run == true && '--dry-run' || '' }} - name: Docker clickhouse/clickhouse-server building if: ${{ inputs.type == 'patch' }} shell: bash @@ -165,4 +163,4 @@ runs: if: ${{ !cancelled() }} shell: bash run: | - python3 ./tests/ci/create_release.py --post-status ${{ inputs.dry-run && '--dry-run' || '' }} + python3 ./tests/ci/create_release.py --post-status ${{ inputs.dry-run == true && '--dry-run' || '' }} diff --git a/.github/workflows/create_release.yml b/.github/workflows/create_release.yml index 55644bdd503..217f27086c5 100644 --- a/.github/workflows/create_release.yml +++ b/.github/workflows/create_release.yml @@ -35,10 +35,142 @@ jobs: with: token: ${{secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN}} fetch-depth: 0 - - name: Call Release Action - uses: ./.github/actions/release + - name: Prepare Release Info + shell: bash + run: | + python3 ./tests/ci/create_release.py --prepare-release-info \ + --ref ${{ inputs.ref }} --release-type ${{ inputs.type }} ${{ inputs.dry-run == true && '--dry-run' || '' }} + echo "::group::Release Info" + python3 -m json.tool /tmp/release_info.json + echo "::endgroup::" + release_tag=$(jq -r '.release_tag' /tmp/release_info.json) + commit_sha=$(jq -r '.commit_sha' /tmp/release_info.json) + echo "Release Tag: $release_tag" + echo "RELEASE_TAG=$release_tag" >> "$GITHUB_ENV" + echo "COMMIT_SHA=$commit_sha" >> "$GITHUB_ENV" + - name: Download All Release Artifacts + if: ${{ inputs.type == 'patch' }} + shell: bash + run: | + python3 ./tests/ci/create_release.py --download-packages ${{ inputs.dry-run == true && '--dry-run' || '' }} + - name: Push Git Tag for the Release + shell: bash + run: | + python3 ./tests/ci/create_release.py --push-release-tag ${{ inputs.dry-run == true && '--dry-run' || '' }} + - name: Push New Release Branch + if: ${{ inputs.type == 'new' }} + shell: bash + run: | + python3 ./tests/ci/create_release.py --push-new-release-branch ${{ inputs.dry-run == true && '--dry-run' || '' }} + - name: Bump CH Version and Update Contributors' List + shell: bash + run: | + python3 ./tests/ci/create_release.py --create-bump-version-pr ${{ inputs.dry-run == true && '--dry-run' || '' }} + - name: Bump Docker versions, Changelog, Security + if: ${{ inputs.type == 'patch' }} + shell: bash + run: | + git checkout master + python3 ./tests/ci/create_release.py --set-progress-started --progress "update changelog, docker version, security" + echo "List versions" + ./utils/list-versions/list-versions.sh > ./utils/list-versions/version_date.tsv + echo "Update docker version" + ./utils/list-versions/update-docker-version.sh + echo "Generate ChangeLog" + export CI=1 + docker run -u "${UID}:${GID}" -e PYTHONUNBUFFERED=1 -e CI=1 --network=host \ + --volume=".:/ClickHouse" clickhouse/style-test \ + /ClickHouse/tests/ci/changelog.py -v --debug-helpers \ + --gh-user-or-token=${{secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN}} --jobs=5 \ + --output="/ClickHouse/docs/changelogs/${{ env.RELEASE_TAG }}.md" ${{ env.RELEASE_TAG }} + git add ./docs/changelogs/${{ env.RELEASE_TAG }}.md + echo "Generate Security" + python3 ./utils/security-generator/generate_security.py > SECURITY.md + git diff HEAD + - name: Create ChangeLog PR + if: ${{ inputs.type == 'patch' && ! inputs.dry-run }} + uses: peter-evans/create-pull-request@v6 with: - ref: ${{ inputs.ref }} - type: ${{ inputs.type }} - dry-run: ${{ inputs.dry-run }} - token: ${{secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN}} + author: "robot-clickhouse " + token: ${{ secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN }} + committer: "robot-clickhouse " + commit-message: Update version_date.tsv and changelogs after ${{ env.RELEASE_TAG }} + branch: auto/${{ env.RELEASE_TAG }} + assignees: ${{ github.event.sender.login }} # assign the PR to the tag pusher + delete-branch: true + title: Update version_date.tsv and changelog after ${{ env.RELEASE_TAG }} + labels: do not test + body: | + Update version_date.tsv and changelogs after ${{ env.RELEASE_TAG }} + ### Changelog category (leave one): + - Not for changelog (changelog entry is not required) + - name: Complete previous steps and Restore git state + if: ${{ inputs.type == 'patch' }} + shell: bash + run: | + python3 ./tests/ci/create_release.py --set-progress-completed + git reset --hard HEAD + git checkout "$GITHUB_REF_NAME" + - name: Create GH Release + shell: bash + if: ${{ inputs.type == 'patch' }} + run: | + python3 ./tests/ci/create_release.py --create-gh-release ${{ inputs.dry-run == true && '--dry-run' || '' }} + - name: Export TGZ Packages + if: ${{ inputs.type == 'patch' }} + shell: bash + run: | + python3 ./tests/ci/artifactory.py --export-tgz ${{ inputs.dry-run == true && '--dry-run' || '' }} + - name: Test TGZ Packages + if: ${{ inputs.type == 'patch' }} + shell: bash + run: | + python3 ./tests/ci/artifactory.py --test-tgz ${{ inputs.dry-run == true && '--dry-run' || '' }} + - name: Export RPM Packages + if: ${{ inputs.type == 'patch' }} + shell: bash + run: | + python3 ./tests/ci/artifactory.py --export-rpm ${{ inputs.dry-run == true && '--dry-run' || '' }} + - name: Test RPM Packages + if: ${{ inputs.type == 'patch' }} + shell: bash + run: | + python3 ./tests/ci/artifactory.py --test-rpm ${{ inputs.dry-run == true && '--dry-run' || '' }} + - name: Export Debian Packages + if: ${{ inputs.type == 'patch' }} + shell: bash + run: | + python3 ./tests/ci/artifactory.py --export-debian ${{ inputs.dry-run == true && '--dry-run' || '' }} + - name: Test Debian Packages + if: ${{ inputs.type == 'patch' }} + shell: bash + run: | + python3 ./tests/ci/artifactory.py --test-debian ${{ inputs.dry-run == true && '--dry-run' || '' }} + - name: Docker clickhouse/clickhouse-server building + if: ${{ inputs.type == 'patch' }} + shell: bash + run: | + cd "./tests/ci" + python3 ./create_release.py --set-progress-started --progress "docker server release" + export CHECK_NAME="Docker server image" + python3 docker_server.py --release-type auto --version ${{ env.RELEASE_TAG }} --check-name "$CHECK_NAME" --sha ${{ env.COMMIT_SHA }} ${{ ! inputs.dry-run && '--push' || '' }} + python3 ./create_release.py --set-progress-completed + - name: Docker clickhouse/clickhouse-keeper building + if: ${{ inputs.type == 'patch' }} + shell: bash + run: | + cd "./tests/ci" + python3 ./create_release.py --set-progress-started --progress "docker keeper release" + export CHECK_NAME="Docker keeper image" + python3 docker_server.py --release-type auto --version ${{ env.RELEASE_TAG }} --check-name "$CHECK_NAME" --sha ${{ env.COMMIT_SHA }} ${{ ! inputs.dry-run && '--push' || '' }} + python3 ./create_release.py --set-progress-completed + - name: Set current Release progress to Completed with OK + shell: bash + run: | + python3 ./tests/ci/create_release.py --set-progress-started --progress "completed" + python3 ./tests/ci/create_release.py --set-progress-completed + - name: Post Slack Message + if: ${{ !cancelled() }} + shell: bash + run: | + python3 ./tests/ci/create_release.py --post-status ${{ inputs.dry-run == true && '--dry-run' || '' }} diff --git a/tests/ci/create_release.py b/tests/ci/create_release.py index a0b4083b673..b02a0bb8ed5 100755 --- a/tests/ci/create_release.py +++ b/tests/ci/create_release.py @@ -315,7 +315,7 @@ class ReleaseInfo: cmd_push_branch = f"{GIT_PREFIX} push --set-upstream origin {branch_upd_version_contributors}" body_file = get_abs_path(".github/PULL_REQUEST_TEMPLATE.md") actor = os.getenv("GITHUB_ACTOR", "") or "me" - cmd_create_pr = f"gh pr create --repo {GITHUB_REPOSITORY} --title 'Update version after release' --head {branch_upd_version_contributors} --base {self.release_branch} --body-file '{body_file} --label 'do not test' --assignee @{actor}" + cmd_create_pr = f"gh pr create --repo {GITHUB_REPOSITORY} --title 'Update version after release' --head {branch_upd_version_contributors} --base {self.release_branch} --body-file {body_file} --label 'do not test' --assignee {actor}" Shell.run(cmd_commit_version_upd, check=True, dry_run=dry_run) Shell.run(cmd_push_branch, check=True, dry_run=dry_run) Shell.run(cmd_create_pr, check=True, dry_run=dry_run) From c534cd5bc21b59788750bdfcfb4177ebba0afc85 Mon Sep 17 00:00:00 2001 From: Max K Date: Wed, 31 Jul 2024 16:22:43 +0200 Subject: [PATCH 556/661] changelog.py to retrieve best token s3fs fix changelog.py to use base branch to filter prs --- .github/workflows/create_release.yml | 12 ++++++------ tests/ci/artifactory.py | 4 ++++ tests/ci/changelog.py | 23 +++++++++++++++++++++++ 3 files changed, 33 insertions(+), 6 deletions(-) diff --git a/.github/workflows/create_release.yml b/.github/workflows/create_release.yml index 217f27086c5..5e34f50fab5 100644 --- a/.github/workflows/create_release.yml +++ b/.github/workflows/create_release.yml @@ -81,7 +81,7 @@ jobs: docker run -u "${UID}:${GID}" -e PYTHONUNBUFFERED=1 -e CI=1 --network=host \ --volume=".:/ClickHouse" clickhouse/style-test \ /ClickHouse/tests/ci/changelog.py -v --debug-helpers \ - --gh-user-or-token=${{secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN}} --jobs=5 \ + --jobs=5 \ --output="/ClickHouse/docs/changelogs/${{ env.RELEASE_TAG }}.md" ${{ env.RELEASE_TAG }} git add ./docs/changelogs/${{ env.RELEASE_TAG }}.md echo "Generate Security" @@ -111,11 +111,11 @@ jobs: python3 ./tests/ci/create_release.py --set-progress-completed git reset --hard HEAD git checkout "$GITHUB_REF_NAME" - - name: Create GH Release - shell: bash - if: ${{ inputs.type == 'patch' }} - run: | - python3 ./tests/ci/create_release.py --create-gh-release ${{ inputs.dry-run == true && '--dry-run' || '' }} +# - name: Create GH Release +# shell: bash +# if: ${{ inputs.type == 'patch' }} +# run: | +# python3 ./tests/ci/create_release.py --create-gh-release ${{ inputs.dry-run == true && '--dry-run' || '' }} - name: Export TGZ Packages if: ${{ inputs.type == 'patch' }} shell: bash diff --git a/tests/ci/artifactory.py b/tests/ci/artifactory.py index 86dcaf79854..4ee326593e6 100644 --- a/tests/ci/artifactory.py +++ b/tests/ci/artifactory.py @@ -52,6 +52,10 @@ class R2MountPoint: if self.CACHE_ENABLED else "" ) + if not dry_run: + self.aux_mount_options += ( + "-o passwd_file /home/ubuntu/.passwd-s3fs_packages " + ) # without -o nomultipart there are errors like "Error 5 writing to /home/ubuntu/***.deb: Input/output error" self.mount_cmd = f"s3fs {self.bucket_name} {self.MOUNT_POINT} -o url={self.API_ENDPOINT} -o use_path_request_style -o umask=0000 -o nomultipart -o logfile={self.LOG_FILE} {self.aux_mount_options}" elif self.app == MountPointApp.RCLONE: diff --git a/tests/ci/changelog.py b/tests/ci/changelog.py index 3ba618f3ae5..e23dd8e4c67 100755 --- a/tests/ci/changelog.py +++ b/tests/ci/changelog.py @@ -19,6 +19,8 @@ from env_helper import TEMP_PATH from git_helper import git_runner, is_shallow from github_helper import GitHub, PullRequest, PullRequests, Repository from s3_helper import S3Helper +from get_robot_token import get_best_robot_token +from ci_utils import Shell from version_helper import ( FILE_WITH_VERSION_PATH, get_abs_path, @@ -171,6 +173,7 @@ def parse_args() -> argparse.Namespace: parser.add_argument( "--gh-user-or-token", help="user name or GH token to authenticate", + default=get_best_robot_token(), ) parser.add_argument( "--gh-password", @@ -397,6 +400,15 @@ def get_year(prs: PullRequests) -> int: return max(pr.created_at.year for pr in prs) +def get_branch_by_tag(tag: str) -> Optional[str]: + tag.removeprefix("v") + versions = tag.split(".") + if len(versions) < 3: + print("ERROR: Can't get branch by tag") + return None + return f"{versions[0]}.{versions[1]}" + + def main(): log_levels = [logging.WARN, logging.INFO, logging.DEBUG] args = parse_args() @@ -446,6 +458,17 @@ def main(): gh_cache = GitHubCache(gh.cache_path, temp_path, S3Helper()) gh_cache.download() query = f"type:pr repo:{args.repo} is:merged" + branch = get_branch_by_tag(TO_REF) + if branch and Shell.check(f"git show-ref --quiet {branch}"): + try: + if int(branch.split(".")[-1]) > 1: + query += f" base:{branch}" + print(f"NOTE: will use base branch to filter PRs {branch}") + except ValueError: + print(f"ERROR: cannot get minor version from branch {branch} - pass") + pass + else: + print(f"ERROR: invalid branch {branch} - pass") prs = gh.get_pulls_from_search( query=query, merged=merged, sort="created", progress_func=tqdm.tqdm ) From 8214910cc7bd84f613631c2fada9682820df8003 Mon Sep 17 00:00:00 2001 From: Max K Date: Wed, 31 Jul 2024 20:14:22 +0200 Subject: [PATCH 557/661] add geesfs --- .github/workflows/create_release.yml | 10 +++---- .github/workflows/release.yml | 4 +-- tests/ci/artifactory.py | 35 +++++++++++++++++++++--- tests/ci/changelog.py | 41 ++++++++++++++++++---------- tests/ci/ci_utils.py | 11 ++++---- 5 files changed, 69 insertions(+), 32 deletions(-) diff --git a/.github/workflows/create_release.yml b/.github/workflows/create_release.yml index 5e34f50fab5..c3126abe461 100644 --- a/.github/workflows/create_release.yml +++ b/.github/workflows/create_release.yml @@ -62,15 +62,14 @@ jobs: shell: bash run: | python3 ./tests/ci/create_release.py --push-new-release-branch ${{ inputs.dry-run == true && '--dry-run' || '' }} - - name: Bump CH Version and Update Contributors' List - shell: bash - run: | - python3 ./tests/ci/create_release.py --create-bump-version-pr ${{ inputs.dry-run == true && '--dry-run' || '' }} +# - name: Bump CH Version and Update Contributors' List +# shell: bash +# run: | +# python3 ./tests/ci/create_release.py --create-bump-version-pr ${{ inputs.dry-run == true && '--dry-run' || '' }} - name: Bump Docker versions, Changelog, Security if: ${{ inputs.type == 'patch' }} shell: bash run: | - git checkout master python3 ./tests/ci/create_release.py --set-progress-started --progress "update changelog, docker version, security" echo "List versions" ./utils/list-versions/list-versions.sh > ./utils/list-versions/version_date.tsv @@ -96,6 +95,7 @@ jobs: committer: "robot-clickhouse " commit-message: Update version_date.tsv and changelogs after ${{ env.RELEASE_TAG }} branch: auto/${{ env.RELEASE_TAG }} + base: master assignees: ${{ github.event.sender.login }} # assign the PR to the tag pusher delete-branch: true title: Update version_date.tsv and changelog after ${{ env.RELEASE_TAG }} diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 3bd6dfae6ca..8620d15ec19 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -55,12 +55,12 @@ jobs: run: | cd "$GITHUB_WORKSPACE/tests/ci" export CHECK_NAME="Docker server image" - python3 docker_server.py --release-type auto --version "$GITHUB_TAG" --check-name "$CHECK_NAME" --push + python3 docker_server.py --release-type auto --version "$GITHUB_TAG" --sha "$(git rev-list -n 1 $GITHUB_TAG)" --check-name "$CHECK_NAME" --push - name: Check docker clickhouse/clickhouse-keeper building run: | cd "$GITHUB_WORKSPACE/tests/ci" export CHECK_NAME="Docker keeper image" - python3 docker_server.py --release-type auto --version "$GITHUB_TAG" --check-name "$CHECK_NAME" --push + python3 docker_server.py --release-type auto --version "$GITHUB_TAG" --sha "$(git rev-list -n 1 $GITHUB_TAG)" --check-name "$CHECK_NAME" --push - name: Cleanup if: always() run: | diff --git a/tests/ci/artifactory.py b/tests/ci/artifactory.py index 4ee326593e6..a508374f856 100644 --- a/tests/ci/artifactory.py +++ b/tests/ci/artifactory.py @@ -15,6 +15,7 @@ from ci_utils import WithIter, Shell class MountPointApp(metaclass=WithIter): RCLONE = "rclone" S3FS = "s3fs" + GEESEFS = "geesefs" class R2MountPoint: @@ -70,6 +71,20 @@ class R2MountPoint: ) # Use --no-modtime to try to avoid: ERROR : rpm/lts/clickhouse-client-24.3.6.5.x86_64.rpm: Failed to apply pending mod time self.mount_cmd = f"rclone mount remote:{self.bucket_name} {self.MOUNT_POINT} --daemon --cache-dir {self.cache_dir} --umask 0000 --log-file {self.LOG_FILE} {self.aux_mount_options}" + elif self.app == MountPointApp.GEESEFS: + self.cache_dir = "/home/ubuntu/geesefs_cache" + self.aux_mount_options += ( + f" --cache={self.cache_dir} " if self.CACHE_ENABLED else "" + ) + if not dry_run: + self.aux_mount_options += f" --shared-config=/home/ubuntu/.r2_auth " + else: + self.aux_mount_options += ( + f" --shared-config=/home/ubuntu/.r2_auth_test " + ) + if self.DEBUG: + self.aux_mount_options += " --debug_s3 --debug_fuse " + self.mount_cmd = f"geesefs --endpoint={self.API_ENDPOINT} --cheap --memory-limit=2050 --gc-interval=100 --max-flushers=5 --max-parallel-parts=1 --max-parallel-copy=2 --log-file={self.LOG_FILE} {self.aux_mount_options} {self.bucket_name} {self.MOUNT_POINT}" else: assert False @@ -87,7 +102,7 @@ class R2MountPoint: Shell.run(_UNMOUNT_CMD) Shell.run(_MKDIR_CMD) Shell.run(_MKDIR_FOR_CACHE) - if self.app == MountPointApp.S3FS: + if self.app != MountPointApp.RCLONE: Shell.run(self.mount_cmd, check=True) else: # didn't manage to use simple run() and without blocking or failure @@ -158,7 +173,13 @@ class DebianArtifactory: cmd = f'docker run --rm ubuntu:latest bash -c "apt update -y; apt install -y sudo gnupg ca-certificates; apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv 8919F6BD2B48D754; {debian_command}"' print("Running test command:") print(f" {cmd}") - Shell.run(cmd, check=True) + assert Shell.check(cmd) + print(f"Test packages installation, version [latest]") + debian_command_2 = f"echo 'deb {self.repo_url} stable main' | tee /etc/apt/sources.list.d/clickhouse.list; apt update -y; apt-get install -y clickhouse-common-static clickhouse-client" + cmd = f'docker run --rm ubuntu:latest bash -c "apt update -y; apt install -y sudo gnupg ca-certificates; apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv 8919F6BD2B48D754; {debian_command_2}"' + print("Running test command:") + print(f" {cmd}") + assert Shell.check(cmd) self.release_info.debian_command = debian_command self.release_info.dump() @@ -234,7 +255,13 @@ class RpmArtifactory: cmd = f'docker run --rm fedora:latest /bin/bash -c "dnf -y install dnf-plugins-core && dnf config-manager --add-repo={self.repo_url} && {rpm_command}"' print("Running test command:") print(f" {cmd}") - Shell.run(cmd, check=True) + assert Shell.check(cmd) + print(f"Test package installation, version [latest]") + rpm_command_2 = f"dnf config-manager --add-repo={self.repo_url} && dnf makecache && dnf -y install clickhouse-client" + cmd = f'docker run --rm fedora:latest /bin/bash -c "dnf -y install dnf-plugins-core && dnf config-manager --add-repo={self.repo_url} && {rpm_command_2}"' + print("Running test command:") + print(f" {cmd}") + assert Shell.check(cmd) self.release_info.rpm_command = rpm_command self.release_info.dump() @@ -350,7 +377,7 @@ if __name__ == "__main__": ERROR : IO error: NotImplemented: versionId not implemented Failed to copy: NotImplemented: versionId not implemented """ - mp = R2MountPoint(MountPointApp.S3FS, dry_run=args.dry_run) + mp = R2MountPoint(MountPointApp.GEESEFS, dry_run=args.dry_run) if args.export_debian: with ReleaseContextManager( release_progress=ReleaseProgress.EXPORT_DEB diff --git a/tests/ci/changelog.py b/tests/ci/changelog.py index e23dd8e4c67..929f0f3523a 100755 --- a/tests/ci/changelog.py +++ b/tests/ci/changelog.py @@ -7,7 +7,7 @@ import re from datetime import date, timedelta from pathlib import Path from subprocess import DEVNULL -from typing import Any, Dict, List, Optional, TextIO +from typing import Any, Dict, List, Optional, TextIO, Tuple import tqdm # type: ignore from github.GithubException import RateLimitExceededException, UnknownObjectException @@ -400,13 +400,19 @@ def get_year(prs: PullRequests) -> int: return max(pr.created_at.year for pr in prs) -def get_branch_by_tag(tag: str) -> Optional[str]: - tag.removeprefix("v") +def get_branch_and_patch_by_tag(tag: str) -> Tuple[Optional[str], Optional[int]]: + tag = tag.removeprefix("v") versions = tag.split(".") - if len(versions) < 3: + if len(versions) < 4: print("ERROR: Can't get branch by tag") - return None - return f"{versions[0]}.{versions[1]}" + return None, None + try: + patch_version = int(versions[2]) + branch = f"{int(versions[0])}.{int(versions[1])}" + print(f"Branch [{branch}], patch version [{patch_version}]") + except ValueError: + return None, None + return branch, patch_version def main(): @@ -458,17 +464,22 @@ def main(): gh_cache = GitHubCache(gh.cache_path, temp_path, S3Helper()) gh_cache.download() query = f"type:pr repo:{args.repo} is:merged" - branch = get_branch_by_tag(TO_REF) - if branch and Shell.check(f"git show-ref --quiet {branch}"): - try: - if int(branch.split(".")[-1]) > 1: - query += f" base:{branch}" - print(f"NOTE: will use base branch to filter PRs {branch}") - except ValueError: - print(f"ERROR: cannot get minor version from branch {branch} - pass") - pass + + branch, patch = get_branch_and_patch_by_tag(TO_REF) + if branch and patch and Shell.check(f"git show-ref --quiet {branch}"): + if patch > 1: + query += f" base:{branch}" + print( + f"NOTE: It's a patch [{patch}]. will use base branch to filter PRs [{branch}]" + ) + else: + print( + f"NOTE: It's a first patch version. should count PRs merged on master - won't filter PRs by branch" + ) else: print(f"ERROR: invalid branch {branch} - pass") + + print(f"Fetch PRs with query {query}") prs = gh.get_pulls_from_search( query=query, merged=merged, sort="created", progress_func=tqdm.tqdm ) diff --git a/tests/ci/ci_utils.py b/tests/ci/ci_utils.py index 447aac74c7f..3182c0bc5d8 100644 --- a/tests/ci/ci_utils.py +++ b/tests/ci/ci_utils.py @@ -246,15 +246,14 @@ class Shell: @classmethod def check(cls, command): - result = subprocess.run( + proc = subprocess.Popen( command, shell=True, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - text=True, - check=False, + stdout=subprocess.STDOUT, + stderr=subprocess.STDOUT, ) - return result.returncode == 0 + proc.wait() + return proc.returncode == 0 class Utils: From 4802ea540a4691c13386b74416352155b93f713d Mon Sep 17 00:00:00 2001 From: Max K Date: Thu, 1 Aug 2024 11:57:54 +0200 Subject: [PATCH 558/661] improve ci_utils' Shell --- .github/workflows/pull_request.yml | 9 +++- .github/workflows/release.yml | 6 ++- pyproject.toml | 1 + tests/ci/artifactory.py | 84 +++++++++++------------------ tests/ci/auto_release.py | 5 +- tests/ci/ci_buddy.py | 6 ++- tests/ci/ci_definitions.py | 3 +- tests/ci/ci_utils.py | 72 ++++++++++++------------- tests/ci/create_release.py | 86 +++++++++++++++++------------- tests/ci/docker_images_helper.py | 8 +-- 10 files changed, 141 insertions(+), 139 deletions(-) diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index 854dff530e7..04bef1460a6 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -142,8 +142,13 @@ jobs: # Reports should run even if Builds_1/2 fail - run them separately (not in Tests_1/2/3) Builds_Report: # run report check for failed builds to indicate the CI error - if: ${{ !cancelled() && needs.RunConfig.result == 'success' && contains(fromJson(needs.RunConfig.outputs.data).jobs_data.jobs_to_do, 'Builds') }} - needs: [RunConfig, StyleCheck, Builds_1, Builds_2] + if: ${{ !cancelled() + && needs.RunConfig.result == 'success' + && needs.StyleCheck.result != 'failure' + && needs.FastTest.result != 'failure' + && needs.BuildDockers.result != 'failure' + && contains(fromJson(needs.RunConfig.outputs.data).jobs_data.jobs_to_do, 'Builds') }} + needs: [RunConfig, BuildDockers, StyleCheck, FastTest, Builds_1, Builds_2] uses: ./.github/workflows/reusable_test.yml with: test_name: Builds diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 8620d15ec19..7dc4e3298a6 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -55,12 +55,14 @@ jobs: run: | cd "$GITHUB_WORKSPACE/tests/ci" export CHECK_NAME="Docker server image" - python3 docker_server.py --release-type auto --version "$GITHUB_TAG" --sha "$(git rev-list -n 1 $GITHUB_TAG)" --check-name "$CHECK_NAME" --push + SHA=$(git rev-list -n 1 "$GITHUB_TAG") + python3 docker_server.py --release-type auto --version "$GITHUB_TAG" --sha "$SHA" --check-name "$CHECK_NAME" --push - name: Check docker clickhouse/clickhouse-keeper building run: | cd "$GITHUB_WORKSPACE/tests/ci" export CHECK_NAME="Docker keeper image" - python3 docker_server.py --release-type auto --version "$GITHUB_TAG" --sha "$(git rev-list -n 1 $GITHUB_TAG)" --check-name "$CHECK_NAME" --push + SHA=$(git rev-list -n 1 "$GITHUB_TAG") + python3 docker_server.py --release-type auto --version "$GITHUB_TAG" --sha "$SHA" --check-name "$CHECK_NAME" --push - name: Cleanup if: always() run: | diff --git a/pyproject.toml b/pyproject.toml index c89d46c0929..9bbeac3ddae 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -39,6 +39,7 @@ disable = ''' no-else-return, global-statement, f-string-without-interpolation, + consider-using-with, ''' [tool.pylint.SIMILARITIES] diff --git a/tests/ci/artifactory.py b/tests/ci/artifactory.py index a508374f856..71deaccf917 100644 --- a/tests/ci/artifactory.py +++ b/tests/ci/artifactory.py @@ -13,7 +13,6 @@ from ci_utils import WithIter, Shell class MountPointApp(metaclass=WithIter): - RCLONE = "rclone" S3FS = "s3fs" GEESEFS = "geesefs" @@ -31,9 +30,6 @@ class R2MountPoint: DEBUG = True # enable cache for mountpoint CACHE_ENABLED = False - # TODO: which mode is better: minimal/writes/full/off - _RCLONE_CACHE_MODE = "minimal" - UMASK = "0000" def __init__(self, app: str, dry_run: bool) -> None: assert app in MountPointApp @@ -59,18 +55,6 @@ class R2MountPoint: ) # without -o nomultipart there are errors like "Error 5 writing to /home/ubuntu/***.deb: Input/output error" self.mount_cmd = f"s3fs {self.bucket_name} {self.MOUNT_POINT} -o url={self.API_ENDPOINT} -o use_path_request_style -o umask=0000 -o nomultipart -o logfile={self.LOG_FILE} {self.aux_mount_options}" - elif self.app == MountPointApp.RCLONE: - # run rclone mount process asynchronously, otherwise subprocess.run(daemonized command) will not return - self.cache_dir = "/home/ubuntu/rclone_cache" - self.aux_mount_options += "--no-modtime " if self.NOMODTIME else "" - self.aux_mount_options += "-v " if self.DEBUG else "" # -vv too verbose - self.aux_mount_options += ( - f"--vfs-cache-mode {self._RCLONE_CACHE_MODE} --vfs-cache-max-size {self._CACHE_MAX_SIZE_GB}G" - if self.CACHE_ENABLED - else "--vfs-cache-mode off" - ) - # Use --no-modtime to try to avoid: ERROR : rpm/lts/clickhouse-client-24.3.6.5.x86_64.rpm: Failed to apply pending mod time - self.mount_cmd = f"rclone mount remote:{self.bucket_name} {self.MOUNT_POINT} --daemon --cache-dir {self.cache_dir} --umask 0000 --log-file {self.LOG_FILE} {self.aux_mount_options}" elif self.app == MountPointApp.GEESEFS: self.cache_dir = "/home/ubuntu/geesefs_cache" self.aux_mount_options += ( @@ -98,22 +82,17 @@ class R2MountPoint: ) _TEST_MOUNT_CMD = f"mount | grep -q {self.MOUNT_POINT}" - Shell.run(_CLEAN_LOG_FILE_CMD) - Shell.run(_UNMOUNT_CMD) - Shell.run(_MKDIR_CMD) - Shell.run(_MKDIR_FOR_CACHE) - if self.app != MountPointApp.RCLONE: - Shell.run(self.mount_cmd, check=True) - else: - # didn't manage to use simple run() and without blocking or failure - Shell.run_as_daemon(self.mount_cmd) + Shell.check(_CLEAN_LOG_FILE_CMD, verbose=True) + Shell.check(_UNMOUNT_CMD, verbose=True) + Shell.check(_MKDIR_CMD, verbose=True) + Shell.check(_MKDIR_FOR_CACHE, verbose=True) + Shell.check(self.mount_cmd, strict=True, verbose=True) time.sleep(3) - Shell.run(_TEST_MOUNT_CMD, check=True) + Shell.check(_TEST_MOUNT_CMD, strict=True, verbose=True) @classmethod def teardown(cls): - print(f"Unmount [{cls.MOUNT_POINT}]") - Shell.run(f"umount {cls.MOUNT_POINT}") + Shell.check(f"umount {cls.MOUNT_POINT}", verbose=True) class RepoCodenames(metaclass=WithIter): @@ -148,10 +127,9 @@ class DebianArtifactory: ] REPREPRO_CMD_PREFIX = f"reprepro --basedir {R2MountPoint.MOUNT_POINT}/configs/deb --outdir {R2MountPoint.MOUNT_POINT}/deb --verbose" cmd = f"{REPREPRO_CMD_PREFIX} includedeb {self.codename} {' '.join(paths)}" - print("Running export command:") - print(f" {cmd}") - Shell.run(cmd, check=True) - Shell.run("sync") + print("Running export commands:") + Shell.check(cmd, strict=True, verbose=True) + Shell.check("sync") if self.codename == RepoCodenames.LTS: packages_with_version = [ @@ -163,11 +141,11 @@ class DebianArtifactory: cmd = f"{REPREPRO_CMD_PREFIX} copy {RepoCodenames.STABLE} {RepoCodenames.LTS} {' '.join(packages_with_version)}" print("Running copy command:") print(f" {cmd}") - Shell.run(cmd, check=True) - Shell.run("sync") + Shell.check(cmd, strict=True) + Shell.check("sync") def test_packages(self): - Shell.run("docker pull ubuntu:latest") + Shell.check("docker pull ubuntu:latest", strict=True) print(f"Test packages installation, version [{self.version}]") debian_command = f"echo 'deb {self.repo_url} stable main' | tee /etc/apt/sources.list.d/clickhouse.list; apt update -y; apt-get install -y clickhouse-common-static={self.version} clickhouse-client={self.version}" cmd = f'docker run --rm ubuntu:latest bash -c "apt update -y; apt install -y sudo gnupg ca-certificates; apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv 8919F6BD2B48D754; {debian_command}"' @@ -236,20 +214,18 @@ class RpmArtifactory: print(f"Exporting RPM packages into [{codename}]") for command in commands: - print("Running command:") - print(f" {command}") - Shell.run(command, check=True) + Shell.check(command, strict=True, verbose=True) update_public_key = f"gpg --armor --export {self._SIGN_KEY}" pub_key_path = dest_dir / "repodata" / "repomd.xml.key" print("Updating repomd.xml.key") - pub_key_path.write_text(Shell.run(update_public_key, check=True)) + pub_key_path.write_text(Shell.get_output_or_raise(update_public_key)) if codename == RepoCodenames.LTS: self.export_packages(RepoCodenames.STABLE) - Shell.run("sync") + Shell.check("sync") def test_packages(self): - Shell.run("docker pull fedora:latest") + Shell.check("docker pull fedora:latest", strict=True) print(f"Test package installation, version [{self.version}]") rpm_command = f"dnf config-manager --add-repo={self.repo_url} && dnf makecache && dnf -y install clickhouse-client-{self.version}-1" cmd = f'docker run --rm fedora:latest /bin/bash -c "dnf -y install dnf-plugins-core && dnf config-manager --add-repo={self.repo_url} && {rpm_command}"' @@ -302,26 +278,30 @@ class TgzArtifactory: if codename == RepoCodenames.LTS: self.export_packages(RepoCodenames.STABLE) - Shell.run("sync") + Shell.check("sync") def test_packages(self): tgz_file = "/tmp/tmp.tgz" tgz_sha_file = "/tmp/tmp.tgz.sha512" cmd = f"curl -o {tgz_file} -f0 {self.repo_url}/stable/clickhouse-client-{self.version}-arm64.tgz" - Shell.run( + Shell.check( cmd, - check=True, + strict=True, + verbose=True, ) - Shell.run( + Shell.check( f"curl -o {tgz_sha_file} -f0 {self.repo_url}/stable/clickhouse-client-{self.version}-arm64.tgz.sha512", - check=True, + strict=True, + verbose=True, + ) + expected_checksum = Shell.get_output_or_raise(f"cut -d ' ' -f 1 {tgz_sha_file}") + actual_checksum = Shell.get_output_or_raise( + f"sha512sum {tgz_file} | cut -d ' ' -f 1" ) - expected_checksum = Shell.run(f"cut -d ' ' -f 1 {tgz_sha_file}", check=True) - actual_checksum = Shell.run(f"sha512sum {tgz_file} | cut -d ' ' -f 1") assert ( expected_checksum == actual_checksum ), f"[{actual_checksum} != {expected_checksum}]" - Shell.run("rm /tmp/tmp.tgz*") + Shell.check("rm /tmp/tmp.tgz*", verbose=True) self.release_info.tgz_command = cmd self.release_info.dump() @@ -373,9 +353,9 @@ if __name__ == "__main__": args = parse_args() """ - Use S3FS. RCLONE has some errors with r2 remote which I didn't figure out how to resolve: - ERROR : IO error: NotImplemented: versionId not implemented - Failed to copy: NotImplemented: versionId not implemented + S3FS - very slow with a big repo + RCLONE - fuse had many different errors with r2 remote and completely removed + GEESEFS ? """ mp = R2MountPoint(MountPointApp.GEESEFS, dry_run=args.dry_run) if args.export_debian: diff --git a/tests/ci/auto_release.py b/tests/ci/auto_release.py index f2386fe207f..6c17b4c74ad 100644 --- a/tests/ci/auto_release.py +++ b/tests/ci/auto_release.py @@ -85,7 +85,7 @@ class AutoReleaseInfo: def _prepare(token): assert len(token) > 10 os.environ["GH_TOKEN"] = token - Shell.run("gh auth status", check=True) + Shell.check("gh auth status") gh = GitHub(token) prs = gh.get_release_pulls(GITHUB_REPOSITORY) @@ -106,9 +106,8 @@ def _prepare(token): latest_release_tag_ref = refs[-1] latest_release_tag = repo.get_git_tag(latest_release_tag_ref.object.sha) - commits = Shell.run( + commits = Shell.get_output_or_raise( f"git rev-list --first-parent {latest_release_tag.tag}..origin/{pr.head.ref}", - check=True, ).split("\n") commit_num = len(commits) print( diff --git a/tests/ci/ci_buddy.py b/tests/ci/ci_buddy.py index dfb5885270a..138909c1db0 100644 --- a/tests/ci/ci_buddy.py +++ b/tests/ci/ci_buddy.py @@ -120,8 +120,10 @@ class CIBuddy: ) -> None: instance_id, instance_type = "unknown", "unknown" if with_instance_info: - instance_id = Shell.run("ec2metadata --instance-id") or instance_id - instance_type = Shell.run("ec2metadata --instance-type") or instance_type + instance_id = Shell.get_output("ec2metadata --instance-id") or instance_id + instance_type = ( + Shell.get_output("ec2metadata --instance-type") or instance_type + ) if not job_name: job_name = os.getenv("CHECK_NAME", "unknown") sign = ":red_circle:" if not critical else ":black_circle:" diff --git a/tests/ci/ci_definitions.py b/tests/ci/ci_definitions.py index 054b554b8fa..51de8c63509 100644 --- a/tests/ci/ci_definitions.py +++ b/tests/ci/ci_definitions.py @@ -554,7 +554,7 @@ class CommonJobConfigs: run_command="sqllogic_test.py", timeout=10800, release_only=True, - runner_type=Runners.STYLE_CHECKER, + runner_type=Runners.FUNC_TESTER, ) SQL_TEST = JobConfig( job_name_keyword="sqltest", @@ -582,6 +582,7 @@ class CommonJobConfigs: digest=DigestConfig( include_paths=[ "tests/ci/docker_server.py", + "tests/ci/docker_images_helper.py", "./docker/server", ] ), diff --git a/tests/ci/ci_utils.py b/tests/ci/ci_utils.py index 3182c0bc5d8..cd21554788c 100644 --- a/tests/ci/ci_utils.py +++ b/tests/ci/ci_utils.py @@ -2,6 +2,7 @@ import json import os import re import subprocess +import sys import time from contextlib import contextmanager from pathlib import Path @@ -192,7 +193,7 @@ class GHActions: get_url_cmd = ( f"gh pr list --repo {repo} --head {branch} --json url --jq '.[0].url'" ) - url = Shell.run(get_url_cmd) + url = Shell.get_output(get_url_cmd) if not url: print(f"ERROR: PR nor found, branch [{branch}]") return url @@ -200,59 +201,56 @@ class GHActions: class Shell: @classmethod - def run_strict(cls, command): + def get_output_or_raise(cls, command): + return cls.get_output(command, strict=True) + + @classmethod + def get_output(cls, command, strict=False): res = subprocess.run( command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, - check=True, + check=strict, ) return res.stdout.strip() @classmethod - def run(cls, command, check=False, dry_run=False, **kwargs): + def check( + cls, + command, + strict=False, + verbose=False, + dry_run=False, + stdin_str=None, + **kwargs, + ): if dry_run: print(f"Dry-ryn. Would run command [{command}]") - return "" - print(f"Run command [{command}]") - res = "" - result = subprocess.run( - command, - shell=True, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - text=True, - check=False, - **kwargs, - ) - if result.returncode == 0: - print(f"stdout: {result.stdout.strip()}") - res = result.stdout - else: - print( - f"ERROR: stdout: {result.stdout.strip()}, stderr: {result.stderr.strip()}" - ) - if check: - assert result.returncode == 0 - return res.strip() - - @classmethod - def run_as_daemon(cls, command): - print(f"Run daemon command [{command}]") - subprocess.Popen(command.split(" ")) # pylint:disable=consider-using-with - return 0, "" - - @classmethod - def check(cls, command): + return 0 + if verbose: + print(f"Run command [{command}]") proc = subprocess.Popen( command, shell=True, - stdout=subprocess.STDOUT, stderr=subprocess.STDOUT, + stdout=subprocess.PIPE, + stdin=subprocess.PIPE if stdin_str else None, + universal_newlines=True, + start_new_session=True, + bufsize=1, + errors="backslashreplace", + **kwargs, ) + if stdin_str: + proc.communicate(input=stdin_str) + elif proc.stdout: + for line in proc.stdout: + sys.stdout.write(line) proc.wait() + if strict: + assert proc.returncode == 0 return proc.returncode == 0 @@ -277,7 +275,7 @@ class Utils: @staticmethod def clear_dmesg(): - Shell.run("sudo dmesg --clear ||:") + Shell.check("sudo dmesg --clear", verbose=True) @staticmethod def check_pr_description(pr_body: str, repo_name: str) -> Tuple[str, str]: diff --git a/tests/ci/create_release.py b/tests/ci/create_release.py index b02a0bb8ed5..0d505d6ccc7 100755 --- a/tests/ci/create_release.py +++ b/tests/ci/create_release.py @@ -137,12 +137,13 @@ class ReleaseInfo: assert release_type in ("patch", "new") if release_type == "new": # check commit_ref is right and on a right branch - Shell.run( + Shell.check( f"git merge-base --is-ancestor {commit_ref} origin/master", - check=True, + strict=True, + verbose=True, ) with checkout(commit_ref): - commit_sha = Shell.run(f"git rev-parse {commit_ref}", check=True) + commit_sha = Shell.get_output_or_raise(f"git rev-parse {commit_ref}") # Git() must be inside "with checkout" contextmanager git = Git() version = get_version_from_repo(git=git) @@ -154,13 +155,13 @@ class ReleaseInfo: ), f"BUG: latest tag [{git.latest_tag}], expected [{expected_prev_tag}]" release_tag = version.describe previous_release_tag = expected_prev_tag - previous_release_sha = Shell.run_strict( + previous_release_sha = Shell.get_output_or_raise( f"git rev-parse {previous_release_tag}" ) assert previous_release_sha if release_type == "patch": with checkout(commit_ref): - commit_sha = Shell.run(f"git rev-parse {commit_ref}", check=True) + commit_sha = Shell.get_output_or_raise(f"git rev-parse {commit_ref}") # Git() must be inside "with checkout" contextmanager git = Git() version = get_version_from_repo(git=git) @@ -168,11 +169,16 @@ class ReleaseInfo: version.with_description(codename) release_branch = f"{version.major}.{version.minor}" release_tag = version.describe - Shell.run(f"{GIT_PREFIX} fetch origin {release_branch} --tags", check=True) + Shell.check( + f"{GIT_PREFIX} fetch origin {release_branch} --tags", + strict=True, + verbose=True, + ) # check commit is right and on a right branch - Shell.run( + Shell.check( f"git merge-base --is-ancestor {commit_ref} origin/{release_branch}", - check=True, + strict=True, + verbose=True, ) if version.patch == 1: expected_version = copy(version) @@ -197,7 +203,7 @@ class ReleaseInfo: False ), f"BUG: Unexpected latest tag [{git.latest_tag}] expected [{expected_tag_prefix}*{expected_tag_suffix}]" - previous_release_sha = Shell.run_strict( + previous_release_sha = Shell.get_output_or_raise( f"git rev-parse {previous_release_tag}" ) assert previous_release_sha @@ -226,25 +232,26 @@ class ReleaseInfo: def push_release_tag(self, dry_run: bool) -> None: if dry_run: # remove locally created tag from prev run - Shell.run( - f"{GIT_PREFIX} tag -l | grep -q {self.release_tag} && git tag -d {self.release_tag} ||:" + Shell.check( + f"{GIT_PREFIX} tag -l | grep -q {self.release_tag} && git tag -d {self.release_tag}" ) # Create release tag print( f"Create and push release tag [{self.release_tag}], commit [{self.commit_sha}]" ) tag_message = f"Release {self.release_tag}" - Shell.run( + Shell.check( f"{GIT_PREFIX} tag -a -m '{tag_message}' {self.release_tag} {self.commit_sha}", - check=True, + strict=True, + verbose=True, ) cmd_push_tag = f"{GIT_PREFIX} push origin {self.release_tag}:{self.release_tag}" - Shell.run(cmd_push_tag, dry_run=dry_run, check=True) + Shell.check(cmd_push_tag, dry_run=dry_run, strict=True, verbose=True) @staticmethod def _create_gh_label(label: str, color_hex: str, dry_run: bool) -> None: cmd = f"gh api repos/{GITHUB_REPOSITORY}/labels -f name={label} -f color={color_hex}" - Shell.run(cmd, dry_run=dry_run, check=True) + Shell.check(cmd, dry_run=dry_run, strict=True) def push_new_release_branch(self, dry_run: bool) -> None: assert ( @@ -261,7 +268,7 @@ class ReleaseInfo: ), f"Unexpected current version in git, must precede [{self.version}] by one step, actual [{version.string}]" if dry_run: # remove locally created branch from prev run - Shell.run( + Shell.check( f"{GIT_PREFIX} branch -l | grep -q {new_release_branch} && git branch -d {new_release_branch}" ) print( @@ -275,7 +282,7 @@ class ReleaseInfo: cmd_push_branch = ( f"{GIT_PREFIX} push --set-upstream origin {new_release_branch}" ) - Shell.run(cmd_push_branch, dry_run=dry_run, check=True) + Shell.check(cmd_push_branch, dry_run=dry_run, strict=True, verbose=True) print("Create and push backport tags for new release branch") ReleaseInfo._create_gh_label( @@ -284,13 +291,14 @@ class ReleaseInfo: ReleaseInfo._create_gh_label( f"v{new_release_branch}-affected", "c2bfff", dry_run=dry_run ) - Shell.run( + Shell.check( f"""gh pr create --repo {GITHUB_REPOSITORY} --title 'Release pull request for branch {new_release_branch}' --head {new_release_branch} {pr_labels} --body 'This PullRequest is a part of ClickHouse release cycle. It is used by CI system only. Do not perform any changes with it.' """, dry_run=dry_run, - check=True, + strict=True, + verbose=True, ) def update_version_and_contributors_list(self, dry_run: bool) -> None: @@ -316,13 +324,19 @@ class ReleaseInfo: body_file = get_abs_path(".github/PULL_REQUEST_TEMPLATE.md") actor = os.getenv("GITHUB_ACTOR", "") or "me" cmd_create_pr = f"gh pr create --repo {GITHUB_REPOSITORY} --title 'Update version after release' --head {branch_upd_version_contributors} --base {self.release_branch} --body-file {body_file} --label 'do not test' --assignee {actor}" - Shell.run(cmd_commit_version_upd, check=True, dry_run=dry_run) - Shell.run(cmd_push_branch, check=True, dry_run=dry_run) - Shell.run(cmd_create_pr, check=True, dry_run=dry_run) + Shell.check( + cmd_commit_version_upd, strict=True, dry_run=dry_run, verbose=True + ) + Shell.check(cmd_push_branch, strict=True, dry_run=dry_run, verbose=True) + Shell.check(cmd_create_pr, strict=True, dry_run=dry_run, verbose=True) if dry_run: - Shell.run(f"{GIT_PREFIX} diff '{CMAKE_PATH}' '{CONTRIBUTORS_PATH}'") - Shell.run( - f"{GIT_PREFIX} checkout '{CMAKE_PATH}' '{CONTRIBUTORS_PATH}'" + Shell.check( + f"{GIT_PREFIX} diff '{CMAKE_PATH}' '{CONTRIBUTORS_PATH}'", + verbose=True, + ) + Shell.check( + f"{GIT_PREFIX} checkout '{CMAKE_PATH}' '{CONTRIBUTORS_PATH}'", + verbose=True, ) self.version_bump_pr = "dry-run" else: @@ -358,7 +372,7 @@ class ReleaseInfo: cmds.append(f"gh release upload {self.release_tag} {file}") if not dry_run: for cmd in cmds: - Shell.run(cmd, check=True) + Shell.check(cmd, strict=True, verbose=True) self.release_url = f"https://github.com/{GITHUB_REPOSITORY}/releases/tag/{self.release_tag}" else: print("Dry-run, would run commands:") @@ -424,7 +438,7 @@ class PackageDownloader: self.macos_package_files = ["clickhouse-macos", "clickhouse-macos-aarch64"] self.file_to_type = {} - Shell.run(f"mkdir -p {self.LOCAL_DIR}") + Shell.check(f"mkdir -p {self.LOCAL_DIR}") for package_type in self.PACKAGE_TYPES: for package in self.package_names: @@ -474,7 +488,7 @@ class PackageDownloader: return res def run(self): - Shell.run(f"rm -rf {self.LOCAL_DIR}/*") + Shell.check(f"rm -rf {self.LOCAL_DIR}/*") for package_file in ( self.deb_package_files + self.rpm_package_files + self.tgz_package_files ): @@ -549,33 +563,33 @@ class PackageDownloader: @contextmanager def checkout(ref: str) -> Iterator[None]: - orig_ref = Shell.run(f"{GIT_PREFIX} symbolic-ref --short HEAD", check=True) + orig_ref = Shell.get_output_or_raise(f"{GIT_PREFIX} symbolic-ref --short HEAD") rollback_cmd = f"{GIT_PREFIX} checkout {orig_ref}" assert orig_ref if ref not in (orig_ref,): - Shell.run(f"{GIT_PREFIX} checkout {ref}") + Shell.check(f"{GIT_PREFIX} checkout {ref}", strict=True, verbose=True) try: yield except (Exception, KeyboardInterrupt) as e: print(f"ERROR: Exception [{e}]") - Shell.run(rollback_cmd) + Shell.check(rollback_cmd, verbose=True) raise - Shell.run(rollback_cmd) + Shell.check(rollback_cmd, verbose=True) @contextmanager def checkout_new(ref: str) -> Iterator[None]: - orig_ref = Shell.run(f"{GIT_PREFIX} symbolic-ref --short HEAD", check=True) + orig_ref = Shell.get_output_or_raise(f"{GIT_PREFIX} symbolic-ref --short HEAD") rollback_cmd = f"{GIT_PREFIX} checkout {orig_ref}" assert orig_ref - Shell.run(f"{GIT_PREFIX} checkout -b {ref}", check=True) + Shell.check(f"{GIT_PREFIX} checkout -b {ref}", strict=True, verbose=True) try: yield except (Exception, KeyboardInterrupt) as e: print(f"ERROR: Exception [{e}]") - Shell.run(rollback_cmd) + Shell.check(rollback_cmd, verbose=True) raise - Shell.run(rollback_cmd) + Shell.check(rollback_cmd, verbose=True) def parse_args() -> argparse.Namespace: diff --git a/tests/ci/docker_images_helper.py b/tests/ci/docker_images_helper.py index e6869852c4e..f0323145cfa 100644 --- a/tests/ci/docker_images_helper.py +++ b/tests/ci/docker_images_helper.py @@ -19,11 +19,11 @@ def docker_login(relogin: bool = True) -> None: if relogin or not Shell.check( "docker system info | grep --quiet -E 'Username|Registry'" ): - Shell.run( # pylint: disable=unexpected-keyword-arg + Shell.check( # pylint: disable=unexpected-keyword-arg "docker login --username 'robotclickhouse' --password-stdin", - input=get_parameter_from_ssm("dockerhub_robot_password"), + strict=True, + stdin_str=get_parameter_from_ssm("dockerhub_robot_password"), encoding="utf-8", - check=True, ) @@ -42,7 +42,7 @@ class DockerImage: def pull_image(image: DockerImage) -> DockerImage: try: logging.info("Pulling image %s - start", image) - Shell.run(f"docker pull {image}", check=True) + Shell.check(f"docker pull {image}", strict=True) logging.info("Pulling image %s - done", image) except Exception as ex: logging.info("Got exception pulling docker %s", ex) From a6d0b7afbb8299eb8cf056368e93267ef51359ba Mon Sep 17 00:00:00 2001 From: Max K Date: Thu, 1 Aug 2024 18:05:19 +0200 Subject: [PATCH 559/661] recovery option --- .github/workflows/create_release.yml | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/.github/workflows/create_release.yml b/.github/workflows/create_release.yml index c3126abe461..424dfe60be4 100644 --- a/.github/workflows/create_release.yml +++ b/.github/workflows/create_release.yml @@ -16,6 +16,11 @@ concurrency: options: - patch - new + only-repo: + description: 'Run only repos updates including docker (recovery)' + required: false + default: false + type: boolean dry-run: description: 'Dry run' required: false @@ -54,11 +59,12 @@ jobs: run: | python3 ./tests/ci/create_release.py --download-packages ${{ inputs.dry-run == true && '--dry-run' || '' }} - name: Push Git Tag for the Release + if: ${{ ! inputs.only-repo }} shell: bash run: | python3 ./tests/ci/create_release.py --push-release-tag ${{ inputs.dry-run == true && '--dry-run' || '' }} - name: Push New Release Branch - if: ${{ inputs.type == 'new' }} + if: ${{ inputs.type == 'new' && ! inputs.only-repo }} shell: bash run: | python3 ./tests/ci/create_release.py --push-new-release-branch ${{ inputs.dry-run == true && '--dry-run' || '' }} @@ -67,7 +73,7 @@ jobs: # run: | # python3 ./tests/ci/create_release.py --create-bump-version-pr ${{ inputs.dry-run == true && '--dry-run' || '' }} - name: Bump Docker versions, Changelog, Security - if: ${{ inputs.type == 'patch' }} + if: ${{ inputs.type == 'patch' && ! inputs.only-repo }} shell: bash run: | python3 ./tests/ci/create_release.py --set-progress-started --progress "update changelog, docker version, security" @@ -87,7 +93,7 @@ jobs: python3 ./utils/security-generator/generate_security.py > SECURITY.md git diff HEAD - name: Create ChangeLog PR - if: ${{ inputs.type == 'patch' && ! inputs.dry-run }} + if: ${{ inputs.type == 'patch' && ! inputs.dry-run && ! inputs.only-repo }} uses: peter-evans/create-pull-request@v6 with: author: "robot-clickhouse " @@ -105,7 +111,7 @@ jobs: ### Changelog category (leave one): - Not for changelog (changelog entry is not required) - name: Complete previous steps and Restore git state - if: ${{ inputs.type == 'patch' }} + if: ${{ inputs.type == 'patch' && ! inputs.only-repo }} shell: bash run: | python3 ./tests/ci/create_release.py --set-progress-completed From dab5eb9c24cc2f43a0ad8ee65ecac613896cff10 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 1 Aug 2024 16:16:34 +0000 Subject: [PATCH 560/661] Fix INTERPOLATE by constant. Fix other tests. --- src/Analyzer/InterpolateNode.cpp | 2 +- src/Analyzer/InterpolateNode.h | 2 ++ src/Analyzer/Resolve/QueryAnalyzer.cpp | 13 ++++------- src/Planner/CollectTableExpressionData.cpp | 2 +- src/Planner/Planner.cpp | 22 +++++++++++++++++++ src/Planner/PlannerExpressionAnalysis.cpp | 3 +++ src/Processors/QueryPlan/FillingStep.cpp | 14 +++++++++++- ..._no_aggregates_and_constant_keys.reference | 4 ++-- ...15_analyzer_materialized_constants_bug.sql | 2 +- 9 files changed, 49 insertions(+), 15 deletions(-) diff --git a/src/Analyzer/InterpolateNode.cpp b/src/Analyzer/InterpolateNode.cpp index 97dc79f565b..17c734cf386 100644 --- a/src/Analyzer/InterpolateNode.cpp +++ b/src/Analyzer/InterpolateNode.cpp @@ -24,7 +24,7 @@ void InterpolateNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & format_st { buffer << std::string(indent, ' ') << "INTERPOLATE id: " << format_state.getNodeId(this); - buffer << '\n' << std::string(indent + 2, ' ') << "EXPRESSION\n"; + buffer << '\n' << std::string(indent + 2, ' ') << "EXPRESSION " << expression_name << " \n"; getExpression()->dumpTreeImpl(buffer, format_state, indent + 4); buffer << '\n' << std::string(indent + 2, ' ') << "INTERPOLATE_EXPRESSION\n"; diff --git a/src/Analyzer/InterpolateNode.h b/src/Analyzer/InterpolateNode.h index ec493ed8bdd..eb3d64d7170 100644 --- a/src/Analyzer/InterpolateNode.h +++ b/src/Analyzer/InterpolateNode.h @@ -50,6 +50,8 @@ public: return QueryTreeNodeType::INTERPOLATE; } + const std::string & getExpressionName() const { return expression_name; } + void dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const override; protected: diff --git a/src/Analyzer/Resolve/QueryAnalyzer.cpp b/src/Analyzer/Resolve/QueryAnalyzer.cpp index 767d5c11075..e973bd8fb34 100644 --- a/src/Analyzer/Resolve/QueryAnalyzer.cpp +++ b/src/Analyzer/Resolve/QueryAnalyzer.cpp @@ -64,6 +64,8 @@ #include #include +#include + #include namespace ProfileEvents @@ -4122,11 +4124,7 @@ void QueryAnalyzer::resolveInterpolateColumnsNodeList(QueryTreeNodePtr & interpo { auto & interpolate_node_typed = interpolate_node->as(); - auto * column_to_interpolate = interpolate_node_typed.getExpression()->as(); - if (!column_to_interpolate) - throw Exception(ErrorCodes::LOGICAL_ERROR, "INTERPOLATE can work only for indentifiers, but {} is found", - interpolate_node_typed.getExpression()->formatASTForErrorMessage()); - auto column_to_interpolate_name = column_to_interpolate->getIdentifier().getFullName(); + auto column_to_interpolate_name = interpolate_node_typed.getExpressionName(); resolveExpressionNode(interpolate_node_typed.getExpression(), scope, false /*allow_lambda_expression*/, false /*allow_table_expression*/); @@ -4135,14 +4133,11 @@ void QueryAnalyzer::resolveInterpolateColumnsNodeList(QueryTreeNodePtr & interpo auto & interpolation_to_resolve = interpolate_node_typed.getInterpolateExpression(); IdentifierResolveScope interpolate_scope(interpolation_to_resolve, &scope /*parent_scope*/); - auto fake_column_node = std::make_shared(NameAndTypePair(column_to_interpolate_name, interpolate_node_typed.getExpression()->getResultType()), interpolate_node_typed.getExpression()); + auto fake_column_node = std::make_shared(NameAndTypePair(column_to_interpolate_name, interpolate_node_typed.getExpression()->getResultType()), interpolate_node); if (is_column_constant) interpolate_scope.expression_argument_name_to_node.emplace(column_to_interpolate_name, fake_column_node); resolveExpressionNode(interpolation_to_resolve, interpolate_scope, false /*allow_lambda_expression*/, false /*allow_table_expression*/); - - if (is_column_constant) - interpolation_to_resolve = interpolation_to_resolve->cloneAndReplace(fake_column_node, interpolate_node_typed.getExpression()); } } diff --git a/src/Planner/CollectTableExpressionData.cpp b/src/Planner/CollectTableExpressionData.cpp index 2fe62aa9be0..c48813a4ed4 100644 --- a/src/Planner/CollectTableExpressionData.cpp +++ b/src/Planner/CollectTableExpressionData.cpp @@ -46,7 +46,7 @@ public: auto column_source_node = column_node->getColumnSource(); auto column_source_node_type = column_source_node->getNodeType(); - if (column_source_node_type == QueryTreeNodeType::LAMBDA) + if (column_source_node_type == QueryTreeNodeType::LAMBDA || column_source_node_type == QueryTreeNodeType::INTERPOLATE) return; /// JOIN using expression diff --git a/src/Planner/Planner.cpp b/src/Planner/Planner.cpp index 968642dc9de..b837d9428a1 100644 --- a/src/Planner/Planner.cpp +++ b/src/Planner/Planner.cpp @@ -744,6 +744,8 @@ void addWithFillStepIfNeeded(QueryPlan & query_plan, } else { + ActionsDAG rename_dag; + for (auto & interpolate_node : interpolate_list_nodes) { auto & interpolate_node_typed = interpolate_node->as(); @@ -772,8 +774,28 @@ void addWithFillStepIfNeeded(QueryPlan & query_plan, const auto * alias_node = &interpolate_actions_dag.addAlias(*interpolate_expression, expression_to_interpolate_name); interpolate_actions_dag.getOutputs().push_back(alias_node); + + /// Here we fix INTERPOLATE by constant expression. + /// Example from 02336_sort_optimization_with_fill: + /// + /// SELECT 5 AS x, 'Hello' AS s ORDER BY x WITH FILL FROM 1 TO 10 INTERPOLATE (s AS s||'A') + /// + /// For this query, INTERPOLATE_EXPRESSION would be : s AS concat(s, 'A'), + /// so that interpolate_actions_dag would have INPUT `s`. + /// + /// However, INPUT `s` does not exist. Instead, we have a constant with execution name 'Hello'_String. + /// To fix this, we prepend a rename : 'Hello'_String -> s + if (const auto * constant_node = interpolate_node_typed.getExpression()->as()) + { + const auto * node = &rename_dag.addInput(alias_node->result_name, alias_node->result_type); + node = &rename_dag.addAlias(*node, interpolate_node_typed.getExpressionName()); + rename_dag.getOutputs().push_back(node); + } } + if (!rename_dag.getOutputs().empty()) + interpolate_actions_dag = ActionsDAG::merge(std::move(rename_dag), std::move(interpolate_actions_dag)); + interpolate_actions_dag.removeUnusedActions(); } diff --git a/src/Planner/PlannerExpressionAnalysis.cpp b/src/Planner/PlannerExpressionAnalysis.cpp index 2b67c96d843..ed3f78193ee 100644 --- a/src/Planner/PlannerExpressionAnalysis.cpp +++ b/src/Planner/PlannerExpressionAnalysis.cpp @@ -462,6 +462,9 @@ SortAnalysisResult analyzeSort(const QueryNode & query_node, for (auto & interpolate_node : interpolate_list_node.getNodes()) { auto & interpolate_node_typed = interpolate_node->as(); + if (interpolate_node_typed.getExpression()->getNodeType() == QueryTreeNodeType::CONSTANT) + continue; + interpolate_actions_visitor.visit(interpolate_actions_dag, interpolate_node_typed.getInterpolateExpression()); } diff --git a/src/Processors/QueryPlan/FillingStep.cpp b/src/Processors/QueryPlan/FillingStep.cpp index 81622389ada..8687886447a 100644 --- a/src/Processors/QueryPlan/FillingStep.cpp +++ b/src/Processors/QueryPlan/FillingStep.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include namespace DB @@ -58,14 +59,25 @@ void FillingStep::transformPipeline(QueryPipelineBuilder & pipeline, const Build void FillingStep::describeActions(FormatSettings & settings) const { - settings.out << String(settings.offset, ' '); + String prefix(settings.offset, settings.indent_char); + settings.out << prefix; dumpSortDescription(sort_description, settings.out); settings.out << '\n'; + if (interpolate_description) + { + auto expression = std::make_shared(interpolate_description->actions.clone()); + expression->describeActions(settings.out, prefix); + } } void FillingStep::describeActions(JSONBuilder::JSONMap & map) const { map.add("Sort Description", explainSortDescription(sort_description)); + if (interpolate_description) + { + auto expression = std::make_shared(interpolate_description->actions.clone()); + map.add("Expression", expression->toTree()); + } } void FillingStep::updateOutputStream() diff --git a/tests/queries/0_stateless/00257_shard_no_aggregates_and_constant_keys.reference b/tests/queries/0_stateless/00257_shard_no_aggregates_and_constant_keys.reference index 63b8a9d14fc..fc77ed8a241 100644 --- a/tests/queries/0_stateless/00257_shard_no_aggregates_and_constant_keys.reference +++ b/tests/queries/0_stateless/00257_shard_no_aggregates_and_constant_keys.reference @@ -8,13 +8,13 @@ 40 41 -0 +41 2 42 2 42 43 -0 +43 11 11 diff --git a/tests/queries/0_stateless/03215_analyzer_materialized_constants_bug.sql b/tests/queries/0_stateless/03215_analyzer_materialized_constants_bug.sql index f9ec28d09d8..b2fd69d75d0 100644 --- a/tests/queries/0_stateless/03215_analyzer_materialized_constants_bug.sql +++ b/tests/queries/0_stateless/03215_analyzer_materialized_constants_bug.sql @@ -20,7 +20,7 @@ WITH ( SELECT coalesce(materialize(toLowCardinality(toNullable(1))), 10, NULL), max(v) -FROM remote('127.0.0.{1,2}', default, test__fuzz_21) +FROM remote('127.0.0.{1,2}', currentDatabase(), test__fuzz_21) GROUP BY coalesce(NULL), coalesce(1, 10, 10, materialize(NULL)); From d683fb05a009ed3f58c0e11fc329c3783f934369 Mon Sep 17 00:00:00 2001 From: kssenii Date: Thu, 1 Aug 2024 19:05:30 +0200 Subject: [PATCH 561/661] Fix --- .../IO/CachedOnDiskReadBufferFromFile.cpp | 17 ++++++--- src/Interpreters/Cache/FileCache.cpp | 35 ++++++++++++------- src/Interpreters/Cache/FileCache.h | 4 ++- src/Interpreters/Cache/FileSegment.cpp | 9 ++++- src/Interpreters/Cache/FileSegment.h | 4 ++- tests/config/config.d/storage_conf.xml | 3 +- 6 files changed, 51 insertions(+), 21 deletions(-) diff --git a/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp b/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp index c928d25c7b8..b471f3fc58f 100644 --- a/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp +++ b/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp @@ -135,8 +135,11 @@ bool CachedOnDiskReadBufferFromFile::nextFileSegmentsBatch() else { CreateFileSegmentSettings create_settings(FileSegmentKind::Regular); - file_segments = cache->getOrSet(cache_key, file_offset_of_buffer_end, size, file_size.value(), create_settings, settings.filesystem_cache_segments_batch_size, user); + file_segments = cache->getOrSet( + cache_key, file_offset_of_buffer_end, size, file_size.value(), + create_settings, settings.filesystem_cache_segments_batch_size, user); } + return !file_segments->empty(); } @@ -158,8 +161,8 @@ void CachedOnDiskReadBufferFromFile::initialize() LOG_TEST( log, - "Having {} file segments to read: {}, current offset: {}", - file_segments->size(), file_segments->toString(), file_offset_of_buffer_end); + "Having {} file segments to read: {}, current read range: [{}, {})", + file_segments->size(), file_segments->toString(), file_offset_of_buffer_end, read_until_position); initialized = true; } @@ -1043,6 +1046,10 @@ bool CachedOnDiskReadBufferFromFile::nextImplStep() if (file_segments->size() == 1) { size_t remaining_size_to_read = std::min(current_read_range.right, read_until_position - 1) - file_offset_of_buffer_end + 1; + + LOG_TEST(log, "Remaining size to read: {}, read: {}. Resizing buffer to {}", + remaining_size_to_read, size, nextimpl_working_buffer_offset + std::min(size, remaining_size_to_read)); + size = std::min(size, remaining_size_to_read); chassert(implementation_buffer->buffer().size() >= nextimpl_working_buffer_offset + size); implementation_buffer->buffer().resize(nextimpl_working_buffer_offset + size); @@ -1055,8 +1062,8 @@ bool CachedOnDiskReadBufferFromFile::nextImplStep() chassert( file_offset_of_buffer_end <= read_until_position, - fmt::format("Expected {} <= {} (size: {}, read range: {})", - file_offset_of_buffer_end, read_until_position, size, current_read_range.toString())); + fmt::format("Expected {} <= {} (size: {}, read range: {}, hold file segments: {} ({}))", + file_offset_of_buffer_end, read_until_position, size, current_read_range.toString(), file_segments->size(), file_segments->toString(true))); } swap(*implementation_buffer); diff --git a/src/Interpreters/Cache/FileCache.cpp b/src/Interpreters/Cache/FileCache.cpp index a88c0de2cfe..0a03f5dcc7d 100644 --- a/src/Interpreters/Cache/FileCache.cpp +++ b/src/Interpreters/Cache/FileCache.cpp @@ -316,14 +316,14 @@ FileSegments FileCache::getImpl(const LockedKey & locked_key, const FileSegment: return result; } -std::vector FileCache::splitRange(size_t offset, size_t size) +std::vector FileCache::splitRange(size_t offset, size_t size, size_t aligned_size) { assert(size > 0); std::vector ranges; size_t current_pos = offset; size_t end_pos_non_included = offset + size; - size_t remaining_size = size; + size_t remaining_size = aligned_size; FileSegments file_segments; const size_t max_size = max_file_segment_size.load(); @@ -343,17 +343,20 @@ FileSegments FileCache::splitRangeIntoFileSegments( LockedKey & locked_key, size_t offset, size_t size, + size_t aligned_size, FileSegment::State state, size_t file_segments_limit, const CreateFileSegmentSettings & create_settings) { - assert(size > 0); + chassert(size > 0); + chassert(size <= aligned_size); + /// We take `size` as a soft limit and `aligned_size` as a hard limit. auto current_pos = offset; auto end_pos_non_included = offset + size; size_t current_file_segment_size; - size_t remaining_size = size; + size_t remaining_size = aligned_size; FileSegments file_segments; const size_t max_size = max_file_segment_size.load(); @@ -369,6 +372,8 @@ FileSegments FileCache::splitRangeIntoFileSegments( current_pos += current_file_segment_size; } + chassert(file_segments.size() == file_segments_limit || file_segments.back()->range().contains(offset + size - 1), + fmt::format("Offset: {}, size: {}, file segments: {}", offset, size, toString(file_segments))); return file_segments; } @@ -376,6 +381,7 @@ void FileCache::fillHolesWithEmptyFileSegments( LockedKey & locked_key, FileSegments & file_segments, const FileSegment::Range & range, + size_t non_aligned_right_offset, size_t file_segments_limit, bool fill_with_detached_file_segments, const CreateFileSegmentSettings & create_settings) @@ -442,7 +448,7 @@ void FileCache::fillHolesWithEmptyFileSegments( } else { - auto ranges = splitRange(current_pos, hole_size); + auto ranges = splitRange(current_pos, hole_size, hole_size); FileSegments hole; for (const auto & r : ranges) { @@ -479,7 +485,7 @@ void FileCache::fillHolesWithEmptyFileSegments( chassert(!file_segments_limit || file_segments.size() < file_segments_limit); - if (current_pos <= range.right) + if (current_pos <= non_aligned_right_offset) { /// ________] -- requested range /// _____] @@ -487,6 +493,7 @@ void FileCache::fillHolesWithEmptyFileSegments( /// segmentN auto hole_size = range.right - current_pos + 1; + auto non_aligned_size = non_aligned_right_offset - current_pos + 1; if (fill_with_detached_file_segments) { @@ -497,7 +504,7 @@ void FileCache::fillHolesWithEmptyFileSegments( } else { - auto ranges = splitRange(current_pos, hole_size); + auto ranges = splitRange(current_pos, non_aligned_size, hole_size); FileSegments hole; for (const auto & r : ranges) { @@ -542,7 +549,7 @@ FileSegmentsHolderPtr FileCache::set( else { file_segments = splitRangeIntoFileSegments( - *locked_key, offset, size, FileSegment::State::EMPTY, /* file_segments_limit */0, create_settings); + *locked_key, offset, size, size, FileSegment::State::EMPTY, /* file_segments_limit */0, create_settings); } return std::make_unique(std::move(file_segments)); @@ -659,9 +666,13 @@ FileCache::getOrSet( } } + chassert(range.left >= aligned_offset); + if (file_segments.empty()) { - file_segments = splitRangeIntoFileSegments(*locked_key, range.left, range.size(), FileSegment::State::EMPTY, file_segments_limit, create_settings); + file_segments = splitRangeIntoFileSegments( + *locked_key, range.left, /* size */offset + size - range.left, /* aligned_size */range.size(), + FileSegment::State::EMPTY, file_segments_limit, create_settings); } else { @@ -669,9 +680,9 @@ FileCache::getOrSet( chassert(file_segments.back()->range().left <= range.right); fillHolesWithEmptyFileSegments( - *locked_key, file_segments, range, file_segments_limit, /* fill_with_detached */false, create_settings); + *locked_key, file_segments, range, offset + size - 1, file_segments_limit, /* fill_with_detached */false, create_settings); - if (!file_segments.front()->range().contains(offset)) + if (!file_segments.front()->range().contains(range.left)) { throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected {} to include {} " "(end offset: {}, aligned offset: {}, aligned end offset: {})", @@ -713,7 +724,7 @@ FileSegmentsHolderPtr FileCache::get( } fillHolesWithEmptyFileSegments( - *locked_key, file_segments, range, file_segments_limit, /* fill_with_detached */true, CreateFileSegmentSettings{}); + *locked_key, file_segments, range, offset + size - 1, file_segments_limit, /* fill_with_detached */true, CreateFileSegmentSettings{}); chassert(!file_segments_limit || file_segments.size() <= file_segments_limit); return std::make_unique(std::move(file_segments)); diff --git a/src/Interpreters/Cache/FileCache.h b/src/Interpreters/Cache/FileCache.h index 527fd9d5edf..3f7eec73b56 100644 --- a/src/Interpreters/Cache/FileCache.h +++ b/src/Interpreters/Cache/FileCache.h @@ -263,7 +263,7 @@ private: /// Split range into subranges by max_file_segment_size, /// each subrange size must be less or equal to max_file_segment_size. - std::vector splitRange(size_t offset, size_t size); + std::vector splitRange(size_t offset, size_t size, size_t aligned_size); /// Split range into subranges by max_file_segment_size (same as in splitRange()) /// and create a new file segment for each subrange. @@ -273,6 +273,7 @@ private: LockedKey & locked_key, size_t offset, size_t size, + size_t aligned_size, FileSegment::State state, size_t file_segments_limit, const CreateFileSegmentSettings & create_settings); @@ -281,6 +282,7 @@ private: LockedKey & locked_key, FileSegments & file_segments, const FileSegment::Range & range, + size_t non_aligned_right_offset, size_t file_segments_limit, bool fill_with_detached_file_segments, const CreateFileSegmentSettings & settings); diff --git a/src/Interpreters/Cache/FileSegment.cpp b/src/Interpreters/Cache/FileSegment.cpp index 1664a91b694..c46fb978ae4 100644 --- a/src/Interpreters/Cache/FileSegment.cpp +++ b/src/Interpreters/Cache/FileSegment.cpp @@ -1008,7 +1008,12 @@ FileSegment & FileSegmentsHolder::add(FileSegmentPtr && file_segment) return *file_segments.back(); } -String FileSegmentsHolder::toString() +String FileSegmentsHolder::toString(bool with_state) +{ + return DB::toString(file_segments, with_state); +} + +String toString(const FileSegments & file_segments, bool with_state) { String ranges; for (const auto & file_segment : file_segments) @@ -1018,6 +1023,8 @@ String FileSegmentsHolder::toString() ranges += file_segment->range().toString(); if (file_segment->isUnbound()) ranges += "(unbound)"; + if (with_state) + ranges += "(" + FileSegment::stateToString(file_segment->state()) + ")"; } return ranges; } diff --git a/src/Interpreters/Cache/FileSegment.h b/src/Interpreters/Cache/FileSegment.h index d6b37b60dc1..25ffb880b45 100644 --- a/src/Interpreters/Cache/FileSegment.h +++ b/src/Interpreters/Cache/FileSegment.h @@ -291,7 +291,7 @@ struct FileSegmentsHolder : private boost::noncopyable size_t size() const { return file_segments.size(); } - String toString(); + String toString(bool with_state = false); void popFront() { completeAndPopFrontImpl(); } @@ -317,4 +317,6 @@ private: using FileSegmentsHolderPtr = std::unique_ptr; +String toString(const FileSegments & file_segments, bool with_state = false); + } diff --git a/tests/config/config.d/storage_conf.xml b/tests/config/config.d/storage_conf.xml index 7a9b579c00a..4daa64b520d 100644 --- a/tests/config/config.d/storage_conf.xml +++ b/tests/config/config.d/storage_conf.xml @@ -19,7 +19,8 @@ cache s3_disk s3_cache/ - 104857600 + 100Mi + 5Mi 1 100 LRU From 1e8d0d4a5e8d83a1d123a4b5b6c5a91b41caac1c Mon Sep 17 00:00:00 2001 From: Julia Kartseva Date: Wed, 31 Jul 2024 06:09:14 +0000 Subject: [PATCH 562/661] disable parallel run for network_receive_time_metric_insert If run in parallel, several tests may affect the value of the `NetworkReceiveElapsedMicroseconds` profile event. This may contribute to test flakiness. --- .../0_stateless/01923_network_receive_time_metric_insert.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01923_network_receive_time_metric_insert.sh b/tests/queries/0_stateless/01923_network_receive_time_metric_insert.sh index 97835d97965..77b909ed89e 100755 --- a/tests/queries/0_stateless/01923_network_receive_time_metric_insert.sh +++ b/tests/queries/0_stateless/01923_network_receive_time_metric_insert.sh @@ -1,6 +1,7 @@ #!/usr/bin/env bash -# Tags: no-fasttest +# Tags: no-fasttest, no-parallel # Tag no-fasttest: needs pv +# Tag no-parallel: reads from a system table CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh From 4709222dd1f3a37c5f97e638526c21ade6b5218f Mon Sep 17 00:00:00 2001 From: Julia Kartseva Date: Wed, 31 Jul 2024 23:16:43 +0000 Subject: [PATCH 563/661] print debug info if the test fails --- ...1923_network_receive_time_metric_insert.sh | 22 +++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/tests/queries/0_stateless/01923_network_receive_time_metric_insert.sh b/tests/queries/0_stateless/01923_network_receive_time_metric_insert.sh index 77b909ed89e..adf4fd96a00 100755 --- a/tests/queries/0_stateless/01923_network_receive_time_metric_insert.sh +++ b/tests/queries/0_stateless/01923_network_receive_time_metric_insert.sh @@ -13,9 +13,23 @@ ${CLICKHOUSE_CLIENT} --query "DROP TABLE IF EXISTS t; CREATE TABLE t (x UInt64) seq 1 1000 | pv --quiet --rate-limit 400 | ${CLICKHOUSE_CLIENT} --query "INSERT INTO t FORMAT TSV" # We check that the value of NetworkReceiveElapsedMicroseconds correctly includes the time spent waiting data from the client. -${CLICKHOUSE_CLIENT} --query "SYSTEM FLUSH LOGS; - WITH ProfileEvents['NetworkReceiveElapsedMicroseconds'] AS time - SELECT time >= 1000000 ? 1 : time FROM system.query_log - WHERE current_database = currentDatabase() AND query_kind = 'Insert' AND event_date >= yesterday() AND type = 2 ORDER BY event_time DESC LIMIT 1;" +result=$(${CLICKHOUSE_CLIENT} --query "SYSTEM FLUSH LOGS; + WITH ProfileEvents['NetworkReceiveElapsedMicroseconds'] AS elapsed_us + SELECT elapsed_us FROM system.query_log + WHERE current_database = currentDatabase() AND query_kind = 'Insert' AND event_date >= yesterday() AND type = 'QueryFinish' + ORDER BY event_time DESC LIMIT 1;") + +elapsed_us=$(echo $result | sed 's/ .*//') + +min_elapsed_us=1000000 +if [[ "$elapsed_us" -ge "$min_elapsed_us" ]]; then + echo 1 +else + # Print debug info + ${CLICKHOUSE_CLIENT} --query " + WITH ProfileEvents['NetworkReceiveElapsedMicroseconds'] AS elapsed_us + SELECT query_start_time_microseconds, event_time_microseconds, query_duration_ms, elapsed_us, query FROM system.query_log + WHERE current_database = currentDatabase() and event_date >= yesterday() AND type = 'QueryFinish' ORDER BY query_start_time;" +fi ${CLICKHOUSE_CLIENT} --query "DROP TABLE t" From 30e0c1a1b8479e9b6be0701ba21e6050906a7e43 Mon Sep 17 00:00:00 2001 From: Max K Date: Thu, 1 Aug 2024 18:46:09 +0200 Subject: [PATCH 564/661] try less mem for geesefs --- .github/workflows/create_release.yml | 21 +++++++++++---------- tests/ci/artifactory.py | 8 +++++--- 2 files changed, 16 insertions(+), 13 deletions(-) diff --git a/.github/workflows/create_release.yml b/.github/workflows/create_release.yml index 424dfe60be4..3c61fa4cfe1 100644 --- a/.github/workflows/create_release.yml +++ b/.github/workflows/create_release.yml @@ -17,7 +17,7 @@ concurrency: - patch - new only-repo: - description: 'Run only repos updates including docker (recovery)' + description: 'Run only repos updates including docker (repo-recovery, tests)' required: false default: false type: boolean @@ -68,10 +68,11 @@ jobs: shell: bash run: | python3 ./tests/ci/create_release.py --push-new-release-branch ${{ inputs.dry-run == true && '--dry-run' || '' }} -# - name: Bump CH Version and Update Contributors' List -# shell: bash -# run: | -# python3 ./tests/ci/create_release.py --create-bump-version-pr ${{ inputs.dry-run == true && '--dry-run' || '' }} + - name: Bump CH Version and Update Contributors' List + if: ${{ ! inputs.only-repo }} + shell: bash + run: | + python3 ./tests/ci/create_release.py --create-bump-version-pr ${{ inputs.dry-run == true && '--dry-run' || '' }} - name: Bump Docker versions, Changelog, Security if: ${{ inputs.type == 'patch' && ! inputs.only-repo }} shell: bash @@ -117,11 +118,11 @@ jobs: python3 ./tests/ci/create_release.py --set-progress-completed git reset --hard HEAD git checkout "$GITHUB_REF_NAME" -# - name: Create GH Release -# shell: bash -# if: ${{ inputs.type == 'patch' }} -# run: | -# python3 ./tests/ci/create_release.py --create-gh-release ${{ inputs.dry-run == true && '--dry-run' || '' }} + - name: Create GH Release + if: ${{ inputs.type == 'patch' && ! inputs.only-repo }} + shell: bash + run: | + python3 ./tests/ci/create_release.py --create-gh-release ${{ inputs.dry-run == true && '--dry-run' || '' }} - name: Export TGZ Packages if: ${{ inputs.type == 'patch' }} shell: bash diff --git a/tests/ci/artifactory.py b/tests/ci/artifactory.py index 71deaccf917..8bba7bca30e 100644 --- a/tests/ci/artifactory.py +++ b/tests/ci/artifactory.py @@ -67,8 +67,8 @@ class R2MountPoint: f" --shared-config=/home/ubuntu/.r2_auth_test " ) if self.DEBUG: - self.aux_mount_options += " --debug_s3 --debug_fuse " - self.mount_cmd = f"geesefs --endpoint={self.API_ENDPOINT} --cheap --memory-limit=2050 --gc-interval=100 --max-flushers=5 --max-parallel-parts=1 --max-parallel-copy=2 --log-file={self.LOG_FILE} {self.aux_mount_options} {self.bucket_name} {self.MOUNT_POINT}" + self.aux_mount_options += " --debug_s3 " + self.mount_cmd = f"geesefs --endpoint={self.API_ENDPOINT} --cheap --memory-limit=1000 --gc-interval=100 --max-flushers=10 --max-parallel-parts=1 --max-parallel-copy=10 --log-file={self.LOG_FILE} {self.aux_mount_options} {self.bucket_name} {self.MOUNT_POINT}" else: assert False @@ -207,8 +207,10 @@ class RpmArtifactory: for package in paths: _copy_if_not_exists(Path(package), dest_dir) + # switching between different fuse providers invalidates --update option (apparently some fuse(s) can mess around with mtime) + # add --skip-stat to skip mtime check commands = ( - f"createrepo_c --local-sqlite --workers=2 --update --verbose {dest_dir}", + f"createrepo_c --local-sqlite --workers=2 --update --skip-stat --verbose {dest_dir}", f"gpg --sign-with {self._SIGN_KEY} --detach-sign --batch --yes --armor {dest_dir / 'repodata' / 'repomd.xml'}", ) print(f"Exporting RPM packages into [{codename}]") From e034558f74a1cd46bb8fbdfac3b7dc6d25165f4e Mon Sep 17 00:00:00 2001 From: Max K Date: Thu, 1 Aug 2024 20:51:36 +0200 Subject: [PATCH 565/661] add automerge prs step --- .github/workflows/create_release.yml | 4 +++ pyproject.toml | 1 + tests/ci/ci_utils.py | 2 +- tests/ci/create_release.py | 47 +++++++++++++++++++++++++++- tests/ci/release.py | 1 + 5 files changed, 53 insertions(+), 2 deletions(-) diff --git a/.github/workflows/create_release.yml b/.github/workflows/create_release.yml index 3c61fa4cfe1..e27db1b09a4 100644 --- a/.github/workflows/create_release.yml +++ b/.github/workflows/create_release.yml @@ -171,6 +171,10 @@ jobs: export CHECK_NAME="Docker keeper image" python3 docker_server.py --release-type auto --version ${{ env.RELEASE_TAG }} --check-name "$CHECK_NAME" --sha ${{ env.COMMIT_SHA }} ${{ ! inputs.dry-run && '--push' || '' }} python3 ./create_release.py --set-progress-completed + - name: Update release info. Merge created PRs + shell: bash + run: | + python3 ./tests/ci/create_release.py --merge-prs ${{ inputs.dry-run == true && '--dry-run' || '' }} - name: Set current Release progress to Completed with OK shell: bash run: | diff --git a/pyproject.toml b/pyproject.toml index 9bbeac3ddae..4268901e7f9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -40,6 +40,7 @@ disable = ''' global-statement, f-string-without-interpolation, consider-using-with, + use-maxsplit-arg, ''' [tool.pylint.SIMILARITIES] diff --git a/tests/ci/ci_utils.py b/tests/ci/ci_utils.py index cd21554788c..4f696a2c55a 100644 --- a/tests/ci/ci_utils.py +++ b/tests/ci/ci_utils.py @@ -228,7 +228,7 @@ class Shell: ): if dry_run: print(f"Dry-ryn. Would run command [{command}]") - return 0 + return True if verbose: print(f"Run command [{command}]") proc = subprocess.Popen( diff --git a/tests/ci/create_release.py b/tests/ci/create_release.py index 0d505d6ccc7..c407a74fbf0 100755 --- a/tests/ci/create_release.py +++ b/tests/ci/create_release.py @@ -43,6 +43,7 @@ class ReleaseProgress: TEST_TGZ = "test TGZ packages" TEST_RPM = "test RPM packages" TEST_DEB = "test DEB packages" + MERGE_CREATED_PRS = "merge created PRs" COMPLETED = "completed" @@ -101,6 +102,7 @@ class ReleaseInfo: previous_release_sha: str changelog_pr: str = "" version_bump_pr: str = "" + prs_merged: bool = False release_url: str = "" debian_command: str = "" rpm_command: str = "" @@ -380,6 +382,38 @@ class ReleaseInfo: self.release_url = f"dry-run" self.dump() + def merge_prs(self, dry_run: bool) -> None: + repo = CI.Envs.GITHUB_REPOSITORY + assert self.version_bump_pr + if dry_run: + version_bump_pr_num = 12345 + else: + version_bump_pr_num = int(self.version_bump_pr.split("/")[-1]) + print("Merging Version bump PR") + res_1 = Shell.check( + f"gh pr merge {version_bump_pr_num} --repo {repo} --merge --auto", + verbose=True, + dry_run=dry_run, + ) + + res_2 = True + if not self.release_tag.endswith("-new"): + assert self.changelog_pr + print("Merging ChangeLog PR") + if dry_run: + changelog_pr_num = 23456 + else: + changelog_pr_num = int(self.changelog_pr.split("/")[-1]) + res_2 = Shell.check( + f"gh pr merge {changelog_pr_num} --repo {repo} --merge --auto", + verbose=True, + dry_run=dry_run, + ) + else: + assert not self.changelog_pr + + self.prs_merged = res_1 and res_2 + class RepoTypes: RPM = "rpm" @@ -627,6 +661,11 @@ def parse_args() -> argparse.Namespace: action="store_true", help="Create GH Release object and attach all packages", ) + parser.add_argument( + "--merge-prs", + action="store_true", + help="Merge PRs with version, changelog updates", + ) parser.add_argument( "--post-status", action="store_true", @@ -732,7 +771,6 @@ if __name__ == "__main__": if args.post_status: release_info = ReleaseInfo.from_file() - release_info.update_release_info(dry_run=args.dry_run) if release_info.is_new_release_branch(): title = "New release branch" else: @@ -766,6 +804,13 @@ if __name__ == "__main__": ri.progress_description = ReleaseProgressDescription.OK ri.dump() + if args.merge_prs: + with ReleaseContextManager( + release_progress=ReleaseProgress.MERGE_CREATED_PRS + ) as release_info: + release_info.update_release_info(dry_run=args.dry_run) + release_info.merge_prs(dry_run=args.dry_run) + # tear down ssh if _ssh_agent and _key_pub: _ssh_agent.remove(_key_pub) diff --git a/tests/ci/release.py b/tests/ci/release.py index 2de20d00a00..b26d6205f3b 100755 --- a/tests/ci/release.py +++ b/tests/ci/release.py @@ -689,4 +689,5 @@ def main(): if __name__ == "__main__": + assert False, "Script Deprecated, ask ci team for help" main() From eac2c9fc3d8a88c1033e0f23e048421ecf4db850 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 31 Jul 2024 17:38:20 +0200 Subject: [PATCH 566/661] Minor change --- src/Databases/DatabaseOnDisk.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Databases/DatabaseOnDisk.cpp b/src/Databases/DatabaseOnDisk.cpp index f419f5811a1..734f354d9a5 100644 --- a/src/Databases/DatabaseOnDisk.cpp +++ b/src/Databases/DatabaseOnDisk.cpp @@ -313,7 +313,7 @@ void DatabaseOnDisk::detachTablePermanently(ContextPtr query_context, const Stri std::lock_guard lock(mutex); if (const auto it = snapshot_detached_tables.find(table_name); it == snapshot_detached_tables.end()) { - throw Exception(ErrorCodes::LOGICAL_ERROR, "Snapshot doesn't contain info about detached table={}", table_name); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Snapshot doesn't contain info about detached table `{}`", table_name); } else { From 69bd306a445a6bc8a55be14bb0080864921f8b69 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 2 Aug 2024 00:48:25 +0200 Subject: [PATCH 567/661] Fix race condition in system.processes and Settings --- src/Backups/RestoreCoordinationRemote.cpp | 2 +- src/Backups/RestoreCoordinationRemote.h | 2 -- src/Databases/DatabaseLazy.cpp | 2 +- src/Databases/DatabaseLazy.h | 2 +- src/Interpreters/ProcessList.cpp | 2 +- 5 files changed, 4 insertions(+), 6 deletions(-) diff --git a/src/Backups/RestoreCoordinationRemote.cpp b/src/Backups/RestoreCoordinationRemote.cpp index 44214d00be5..0a69bc0eafb 100644 --- a/src/Backups/RestoreCoordinationRemote.cpp +++ b/src/Backups/RestoreCoordinationRemote.cpp @@ -323,7 +323,7 @@ bool RestoreCoordinationRemote::hasConcurrentRestores(const std::atomic return false; bool result = false; - std::string path = zookeeper_path +"/stage"; + std::string path = zookeeper_path + "/stage"; auto holder = with_retries.createRetriesControlHolder("createRootNodes"); holder.retries_ctl.retryLoop( diff --git a/src/Backups/RestoreCoordinationRemote.h b/src/Backups/RestoreCoordinationRemote.h index 9c299865cfa..a3d57e9a4d0 100644 --- a/src/Backups/RestoreCoordinationRemote.h +++ b/src/Backups/RestoreCoordinationRemote.h @@ -61,8 +61,6 @@ private: void createRootNodes(); void removeAllNodes(); - class ReplicatedDatabasesMetadataSync; - /// get_zookeeper will provide a zookeeper client without any fault injection const zkutil::GetZooKeeper get_zookeeper; const String root_zookeeper_path; diff --git a/src/Databases/DatabaseLazy.cpp b/src/Databases/DatabaseLazy.cpp index ca30ee6db15..3fb6d30fcb8 100644 --- a/src/Databases/DatabaseLazy.cpp +++ b/src/Databases/DatabaseLazy.cpp @@ -44,7 +44,7 @@ namespace ErrorCodes DatabaseLazy::DatabaseLazy(const String & name_, const String & metadata_path_, time_t expiration_time_, ContextPtr context_) - : DatabaseOnDisk(name_, metadata_path_, "data/" + escapeForFileName(name_) + "/", "DatabaseLazy (" + name_ + ")", context_) + : DatabaseOnDisk(name_, metadata_path_, std::filesystem::path("data") / escapeForFileName(name_) / "", "DatabaseLazy (" + name_ + ")", context_) , expiration_time(expiration_time_) { } diff --git a/src/Databases/DatabaseLazy.h b/src/Databases/DatabaseLazy.h index 4347649117d..41cfb751141 100644 --- a/src/Databases/DatabaseLazy.h +++ b/src/Databases/DatabaseLazy.h @@ -12,7 +12,7 @@ class DatabaseLazyIterator; class Context; /** Lazy engine of databases. - * Works like DatabaseOrdinary, but stores in memory only cache. + * Works like DatabaseOrdinary, but stores in memory only the cache. * Can be used only with *Log engines. */ class DatabaseLazy final : public DatabaseOnDisk diff --git a/src/Interpreters/ProcessList.cpp b/src/Interpreters/ProcessList.cpp index 271e23a7288..6cb50b310ad 100644 --- a/src/Interpreters/ProcessList.cpp +++ b/src/Interpreters/ProcessList.cpp @@ -657,7 +657,7 @@ QueryStatusInfo QueryStatus::getInfo(bool get_thread_list, bool get_profile_even { if (auto ctx = context.lock()) { - res.query_settings = std::make_shared(ctx->getSettingsRef()); + res.query_settings = std::make_shared(ctx->getSettingsCopy()); res.current_database = ctx->getCurrentDatabase(); } } From 572831f865d66e046d3e507d214e0f5aeae49ad4 Mon Sep 17 00:00:00 2001 From: Julia Kartseva Date: Fri, 2 Aug 2024 00:09:26 +0000 Subject: [PATCH 568/661] async_insert_race_long flakiness fixes 1. Make the test truly asynchronous. The setting `--async_insert_max_data_size 1` leads to data being flushed synchronously for all inserts in this test. This triggers part creation and extra resource consumption. 2. Do not run the `--wait_for_async_insert` query as a background process with a fixed (50ms) sleep time. If the actual execution time is longer than the anticipated delay time, it may lead to excessive process creation. --- tests/queries/0_stateless/02481_async_insert_race_long.sh | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/tests/queries/0_stateless/02481_async_insert_race_long.sh b/tests/queries/0_stateless/02481_async_insert_race_long.sh index b0088017d32..91e6c4960e0 100755 --- a/tests/queries/0_stateless/02481_async_insert_race_long.sh +++ b/tests/queries/0_stateless/02481_async_insert_race_long.sh @@ -7,7 +7,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -export MY_CLICKHOUSE_CLIENT="$CLICKHOUSE_CLIENT --async_insert_busy_timeout_ms 10 --async_insert_max_data_size 1 --async_insert 1" +export MY_CLICKHOUSE_CLIENT="$CLICKHOUSE_CLIENT --async_insert_busy_timeout_min_ms 50 --async_insert_busy_timeout_max_ms 50 --async_insert 1" function insert1() { @@ -29,11 +29,8 @@ function insert3() { local TIMELIMIT=$((SECONDS+$1)) while [ $SECONDS -lt "$TIMELIMIT" ]; do - ${MY_CLICKHOUSE_CLIENT} --insert_keeper_fault_injection_probability=0 --wait_for_async_insert 1 -q "INSERT INTO async_inserts_race VALUES (7, 'g') (8, 'h')" & - sleep 0.05 + ${MY_CLICKHOUSE_CLIENT} --insert_keeper_fault_injection_probability=0 --wait_for_async_insert 1 -q "INSERT INTO async_inserts_race VALUES (7, 'g') (8, 'h')" done - - wait } function select1() From d6da86dad282e6ad176b115d4344944daa8b9756 Mon Sep 17 00:00:00 2001 From: Julia Kartseva Date: Wed, 26 Jun 2024 01:27:47 +0000 Subject: [PATCH 569/661] Store plain_rewritable metadata in a separate layout --- .../CommonPathPrefixKeyGenerator.cpp | 6 +- .../MetadataStorageFromPlainObjectStorage.cpp | 36 ++++-- .../MetadataStorageFromPlainObjectStorage.h | 16 ++- ...torageFromPlainObjectStorageOperations.cpp | 71 +++++++---- ...aStorageFromPlainObjectStorageOperations.h | 14 ++- ...torageFromPlainRewritableObjectStorage.cpp | 119 +++++++++++++++--- ...aStorageFromPlainRewritableObjectStorage.h | 15 ++- .../test_s3_plain_rewritable/test.py | 13 ++ 8 files changed, 225 insertions(+), 65 deletions(-) diff --git a/src/Disks/ObjectStorages/CommonPathPrefixKeyGenerator.cpp b/src/Disks/ObjectStorages/CommonPathPrefixKeyGenerator.cpp index e321c8a3c5a..2a06d56e5c7 100644 --- a/src/Disks/ObjectStorages/CommonPathPrefixKeyGenerator.cpp +++ b/src/Disks/ObjectStorages/CommonPathPrefixKeyGenerator.cpp @@ -19,11 +19,11 @@ ObjectStorageKey CommonPathPrefixKeyGenerator::generate(const String & path, boo { const auto & [object_key_prefix, suffix_parts] = getLongestObjectKeyPrefix(path); - auto key = std::filesystem::path(object_key_prefix.empty() ? storage_key_prefix : object_key_prefix); + auto key = std::filesystem::path(object_key_prefix.empty() ? std::string() : object_key_prefix); /// The longest prefix is the same as path, meaning that the path is already mapped. if (suffix_parts.empty()) - return ObjectStorageKey::createAsRelative(std::move(key)); + return ObjectStorageKey::createAsRelative(storage_key_prefix, std::move(key)); /// File and top-level directory paths are mapped as is. if (!is_directory || object_key_prefix.empty()) @@ -39,7 +39,7 @@ ObjectStorageKey CommonPathPrefixKeyGenerator::generate(const String & path, boo key /= getRandomASCIIString(part_size); } - return ObjectStorageKey::createAsRelative(key); + return ObjectStorageKey::createAsRelative(storage_key_prefix, key); } std::tuple> CommonPathPrefixKeyGenerator::getLongestObjectKeyPrefix(const std::string & path) const diff --git a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp index 30111d04d20..3da190c7256 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp +++ b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp @@ -7,6 +7,7 @@ #include #include +#include namespace DB { @@ -79,14 +80,16 @@ std::vector MetadataStorageFromPlainObjectStorage::listDirectory(co object_storage->listObjects(abs_key, files, 0); - return getDirectChildrenOnDisk(abs_key, files, path); + std::unordered_set directories; + getDirectChildrenOnDisk(abs_key, object_storage->getCommonKeyPrefix(), files, path, directories); + return std::vector(std::make_move_iterator(directories.begin()), std::make_move_iterator(directories.end())); } DirectoryIteratorPtr MetadataStorageFromPlainObjectStorage::iterateDirectory(const std::string & path) const { /// Required for MergeTree auto paths = listDirectory(path); - // Prepend path, since iterateDirectory() includes path, unlike listDirectory() + /// Prepend path, since iterateDirectory() includes path, unlike listDirectory() std::for_each(paths.begin(), paths.end(), [&](auto & child) { child = fs::path(path) / child; }); std::vector fs_paths(paths.begin(), paths.end()); return std::make_unique(std::move(fs_paths)); @@ -99,10 +102,13 @@ StoredObjects MetadataStorageFromPlainObjectStorage::getStorageObjects(const std return {StoredObject(object_key.serialize(), path, object_size)}; } -std::vector MetadataStorageFromPlainObjectStorage::getDirectChildrenOnDisk( - const std::string & storage_key, const RelativePathsWithMetadata & remote_paths, const std::string & /* local_path */) const +void MetadataStorageFromPlainObjectStorage::getDirectChildrenOnDisk( + const std::string & storage_key, + const std::string & /* storage_key_perfix */, + const RelativePathsWithMetadata & remote_paths, + const std::string & /* local_path */, + std::unordered_set & result) const { - std::unordered_set duplicates_filter; for (const auto & elem : remote_paths) { const auto & path = elem->relative_path; @@ -111,11 +117,10 @@ std::vector MetadataStorageFromPlainObjectStorage::getDirectChildre /// string::npos is ok. const auto slash_pos = path.find('/', child_pos); if (slash_pos == std::string::npos) - duplicates_filter.emplace(path.substr(child_pos)); + result.emplace(path.substr(child_pos)); else - duplicates_filter.emplace(path.substr(child_pos, slash_pos - child_pos)); + result.emplace(path.substr(child_pos, slash_pos - child_pos)); } - return std::vector(std::make_move_iterator(duplicates_filter.begin()), std::make_move_iterator(duplicates_filter.end())); } const IMetadataStorage & MetadataStorageFromPlainObjectStorageTransaction::getStorageForNonTransactionalReads() const @@ -140,7 +145,7 @@ void MetadataStorageFromPlainObjectStorageTransaction::removeDirectory(const std else { addOperation(std::make_unique( - normalizeDirectoryPath(path), *metadata_storage.getPathMap(), object_storage)); + normalizeDirectoryPath(path), *metadata_storage.getPathMap(), object_storage, metadata_storage.getMetadataKeyPrefix())); } } @@ -151,8 +156,13 @@ void MetadataStorageFromPlainObjectStorageTransaction::createDirectory(const std auto normalized_path = normalizeDirectoryPath(path); auto key_prefix = object_storage->generateObjectKeyPrefixForDirectoryPath(normalized_path).serialize(); + chassert(key_prefix.starts_with(object_storage->getCommonKeyPrefix())); auto op = std::make_unique( - std::move(normalized_path), std::move(key_prefix), *metadata_storage.getPathMap(), object_storage); + std::move(normalized_path), + key_prefix.substr(object_storage->getCommonKeyPrefix().size()), + *metadata_storage.getPathMap(), + object_storage, + metadata_storage.getMetadataKeyPrefix()); addOperation(std::move(op)); } @@ -167,7 +177,11 @@ void MetadataStorageFromPlainObjectStorageTransaction::moveDirectory(const std:: throwNotImplemented(); addOperation(std::make_unique( - normalizeDirectoryPath(path_from), normalizeDirectoryPath(path_to), *metadata_storage.getPathMap(), object_storage)); + normalizeDirectoryPath(path_from), + normalizeDirectoryPath(path_to), + *metadata_storage.getPathMap(), + object_storage, + metadata_storage.getMetadataKeyPrefix())); } void MetadataStorageFromPlainObjectStorageTransaction::addBlobToMetadata( diff --git a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.h b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.h index 66da0f2431e..97c5715a937 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.h +++ b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.h @@ -6,6 +6,8 @@ #include #include +#include +#include namespace DB { @@ -78,10 +80,20 @@ public: bool supportsStat() const override { return false; } protected: + /// Get the object storage prefix for storing metadata files. If stored behind a separate endpoint, + /// the metadata keys reflect the layout of the regular files. + virtual std::string getMetadataKeyPrefix() const { return object_storage->getCommonKeyPrefix(); } + + /// Returns a map of local paths to paths in object storage. virtual std::shared_ptr getPathMap() const { throwNotImplemented(); } - virtual std::vector getDirectChildrenOnDisk( - const std::string & storage_key, const RelativePathsWithMetadata & remote_paths, const std::string & local_path) const; + /// Retrieves the immediate files and directories within a given directory on a disk. + virtual void getDirectChildrenOnDisk( + const std::string & storage_key, + const std::string & storage_key_perfix, + const RelativePathsWithMetadata & remote_paths, + const std::string & local_path, + std::unordered_set & result) const; }; class MetadataStorageFromPlainObjectStorageTransaction final : public IMetadataTransaction, private MetadataOperationsHolder diff --git a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.cpp b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.cpp index 7e4b1f69962..0a6086bd39d 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.cpp +++ b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.cpp @@ -20,14 +20,24 @@ namespace constexpr auto PREFIX_PATH_FILE_NAME = "prefix.path"; +ObjectStorageKey createMetadataObjectKey(const std::string & key_prefix, const std::string & metadata_key_prefix) +{ + auto prefix = std::filesystem::path(metadata_key_prefix) / key_prefix; + return ObjectStorageKey::createAsRelative(prefix.string(), PREFIX_PATH_FILE_NAME); +} } MetadataStorageFromPlainObjectStorageCreateDirectoryOperation::MetadataStorageFromPlainObjectStorageCreateDirectoryOperation( std::filesystem::path && path_, std::string && key_prefix_, MetadataStorageFromPlainObjectStorage::PathMap & path_map_, - ObjectStoragePtr object_storage_) - : path(std::move(path_)), key_prefix(key_prefix_), path_map(path_map_), object_storage(object_storage_) + ObjectStoragePtr object_storage_, + const std::string & metadata_key_prefix_) + : path(std::move(path_)) + , key_prefix(key_prefix_) + , path_map(path_map_) + , object_storage(object_storage_) + , metadata_key_prefix(metadata_key_prefix_) { } @@ -36,13 +46,17 @@ void MetadataStorageFromPlainObjectStorageCreateDirectoryOperation::execute(std: if (path_map.contains(path)) return; - LOG_TRACE(getLogger("MetadataStorageFromPlainObjectStorageCreateDirectoryOperation"), "Creating metadata for directory '{}'", path); + auto metadata_object_key = createMetadataObjectKey(key_prefix, metadata_key_prefix); - auto object_key = ObjectStorageKey::createAsRelative(key_prefix, PREFIX_PATH_FILE_NAME); + LOG_TRACE( + getLogger("MetadataStorageFromPlainObjectStorageCreateDirectoryOperation"), + "Creating metadata for directory '{}' with remote path='{}'", + path, + metadata_object_key.serialize()); - auto object = StoredObject(object_key.serialize(), path / PREFIX_PATH_FILE_NAME); + auto metadata_object = StoredObject(metadata_object_key.serialize(), path / PREFIX_PATH_FILE_NAME); auto buf = object_storage->writeObject( - object, + metadata_object, WriteMode::Rewrite, /* object_attributes */ std::nullopt, /* buf_size */ DBMS_DEFAULT_BUFFER_SIZE, @@ -66,25 +80,31 @@ void MetadataStorageFromPlainObjectStorageCreateDirectoryOperation::execute(std: void MetadataStorageFromPlainObjectStorageCreateDirectoryOperation::undo(std::unique_lock &) { - auto object_key = ObjectStorageKey::createAsRelative(key_prefix, PREFIX_PATH_FILE_NAME); + auto metadata_object_key = createMetadataObjectKey(key_prefix, metadata_key_prefix); + if (write_finalized) { path_map.erase(path); auto metric = object_storage->getMetadataStorageMetrics().directory_map_size; CurrentMetrics::sub(metric, 1); - object_storage->removeObject(StoredObject(object_key.serialize(), path / PREFIX_PATH_FILE_NAME)); + object_storage->removeObject(StoredObject(metadata_object_key.serialize(), path / PREFIX_PATH_FILE_NAME)); } else if (write_created) - object_storage->removeObjectIfExists(StoredObject(object_key.serialize(), path / PREFIX_PATH_FILE_NAME)); + object_storage->removeObjectIfExists(StoredObject(metadata_object_key.serialize(), path / PREFIX_PATH_FILE_NAME)); } MetadataStorageFromPlainObjectStorageMoveDirectoryOperation::MetadataStorageFromPlainObjectStorageMoveDirectoryOperation( std::filesystem::path && path_from_, std::filesystem::path && path_to_, MetadataStorageFromPlainObjectStorage::PathMap & path_map_, - ObjectStoragePtr object_storage_) - : path_from(std::move(path_from_)), path_to(std::move(path_to_)), path_map(path_map_), object_storage(object_storage_) + ObjectStoragePtr object_storage_, + const std::string & metadata_key_prefix_) + : path_from(std::move(path_from_)) + , path_to(std::move(path_to_)) + , path_map(path_map_) + , object_storage(object_storage_) + , metadata_key_prefix(metadata_key_prefix_) { } @@ -98,26 +118,26 @@ std::unique_ptr MetadataStorageFromPlainObjectStorageMo if (path_map.contains(new_path)) throw Exception(ErrorCodes::FILE_ALREADY_EXISTS, "Metadata object for the new (destination) path '{}' already exists", new_path); - auto object_key = ObjectStorageKey::createAsRelative(expected_it->second, PREFIX_PATH_FILE_NAME); + auto metadata_object_key = createMetadataObjectKey(expected_it->second, metadata_key_prefix); - auto object = StoredObject(object_key.serialize(), expected_path / PREFIX_PATH_FILE_NAME); + auto metadata_object = StoredObject(metadata_object_key.serialize(), expected_path / PREFIX_PATH_FILE_NAME); if (validate_content) { std::string data; - auto read_buf = object_storage->readObject(object); + auto read_buf = object_storage->readObject(metadata_object); readStringUntilEOF(data, *read_buf); if (data != path_from) throw Exception( ErrorCodes::INCORRECT_DATA, "Incorrect data for object key {}, expected {}, got {}", - object_key.serialize(), + metadata_object_key.serialize(), expected_path, data); } auto write_buf = object_storage->writeObject( - object, + metadata_object, WriteMode::Rewrite, /* object_attributes */ std::nullopt, /*buf_size*/ DBMS_DEFAULT_BUFFER_SIZE, @@ -156,8 +176,11 @@ void MetadataStorageFromPlainObjectStorageMoveDirectoryOperation::undo(std::uniq } MetadataStorageFromPlainObjectStorageRemoveDirectoryOperation::MetadataStorageFromPlainObjectStorageRemoveDirectoryOperation( - std::filesystem::path && path_, MetadataStorageFromPlainObjectStorage::PathMap & path_map_, ObjectStoragePtr object_storage_) - : path(std::move(path_)), path_map(path_map_), object_storage(object_storage_) + std::filesystem::path && path_, + MetadataStorageFromPlainObjectStorage::PathMap & path_map_, + ObjectStoragePtr object_storage_, + const std::string & metadata_key_prefix_) + : path(std::move(path_)), path_map(path_map_), object_storage(object_storage_), metadata_key_prefix(metadata_key_prefix_) { } @@ -170,9 +193,9 @@ void MetadataStorageFromPlainObjectStorageRemoveDirectoryOperation::execute(std: LOG_TRACE(getLogger("MetadataStorageFromPlainObjectStorageRemoveDirectoryOperation"), "Removing directory '{}'", path); key_prefix = path_it->second; - auto object_key = ObjectStorageKey::createAsRelative(key_prefix, PREFIX_PATH_FILE_NAME); - auto object = StoredObject(object_key.serialize(), path / PREFIX_PATH_FILE_NAME); - object_storage->removeObject(object); + auto metadata_object_key = createMetadataObjectKey(key_prefix, metadata_key_prefix); + auto metadata_object = StoredObject(metadata_object_key.serialize(), path / PREFIX_PATH_FILE_NAME); + object_storage->removeObject(metadata_object); path_map.erase(path_it); auto metric = object_storage->getMetadataStorageMetrics().directory_map_size; @@ -189,10 +212,10 @@ void MetadataStorageFromPlainObjectStorageRemoveDirectoryOperation::undo(std::un if (!removed) return; - auto object_key = ObjectStorageKey::createAsRelative(key_prefix, PREFIX_PATH_FILE_NAME); - auto object = StoredObject(object_key.serialize(), path / PREFIX_PATH_FILE_NAME); + auto metadata_object_key = createMetadataObjectKey(key_prefix, metadata_key_prefix); + auto metadata_object = StoredObject(metadata_object_key.serialize(), path / PREFIX_PATH_FILE_NAME); auto buf = object_storage->writeObject( - object, + metadata_object, WriteMode::Rewrite, /* object_attributes */ std::nullopt, /* buf_size */ DBMS_DEFAULT_BUFFER_SIZE, diff --git a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.h b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.h index 4b196f787fd..e31e3cbb262 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.h +++ b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.h @@ -16,6 +16,7 @@ private: std::string key_prefix; MetadataStorageFromPlainObjectStorage::PathMap & path_map; ObjectStoragePtr object_storage; + const std::string metadata_key_prefix; bool write_created = false; bool write_finalized = false; @@ -26,7 +27,8 @@ public: std::filesystem::path && path_, std::string && key_prefix_, MetadataStorageFromPlainObjectStorage::PathMap & path_map_, - ObjectStoragePtr object_storage_); + ObjectStoragePtr object_storage_, + const std::string & metadata_key_prefix_); void execute(std::unique_lock & metadata_lock) override; void undo(std::unique_lock & metadata_lock) override; @@ -39,6 +41,7 @@ private: std::filesystem::path path_to; MetadataStorageFromPlainObjectStorage::PathMap & path_map; ObjectStoragePtr object_storage; + const std::string metadata_key_prefix; bool write_created = false; bool write_finalized = false; @@ -51,7 +54,8 @@ public: std::filesystem::path && path_from_, std::filesystem::path && path_to_, MetadataStorageFromPlainObjectStorage::PathMap & path_map_, - ObjectStoragePtr object_storage_); + ObjectStoragePtr object_storage_, + const std::string & metadata_key_prefix_); void execute(std::unique_lock & metadata_lock) override; @@ -65,13 +69,17 @@ private: MetadataStorageFromPlainObjectStorage::PathMap & path_map; ObjectStoragePtr object_storage; + const std::string metadata_key_prefix; std::string key_prefix; bool removed = false; public: MetadataStorageFromPlainObjectStorageRemoveDirectoryOperation( - std::filesystem::path && path_, MetadataStorageFromPlainObjectStorage::PathMap & path_map_, ObjectStoragePtr object_storage_); + std::filesystem::path && path_, + MetadataStorageFromPlainObjectStorage::PathMap & path_map_, + ObjectStoragePtr object_storage_, + const std::string & metadata_key_prefix_); void execute(std::unique_lock & metadata_lock) override; void undo(std::unique_lock & metadata_lock) override; diff --git a/src/Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.cpp b/src/Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.cpp index 7718fba9c28..f3d00a928e3 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.cpp +++ b/src/Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.cpp @@ -1,6 +1,7 @@ #include #include +#include #include #include #include @@ -21,8 +22,22 @@ namespace { constexpr auto PREFIX_PATH_FILE_NAME = "prefix.path"; +constexpr auto METADATA_PATH_TOKEN = "__meta/"; -MetadataStorageFromPlainObjectStorage::PathMap loadPathPrefixMap(const std::string & root, ObjectStoragePtr object_storage) +/// Use a separate layout for metadata iff: +/// 1. The disk endpoint does not contain objects, OR +/// 2. The metadata is already stored behind a separate endpoint. +/// Otherwise, store metadata along with regular data for backward compatibility. +std::string getMetadataKeyPrefix(ObjectStoragePtr object_storage) +{ + const auto common_key_prefix = std::filesystem::path(object_storage->getCommonKeyPrefix()); + const auto metadata_key_prefix = std::filesystem::path(common_key_prefix) / METADATA_PATH_TOKEN; + return !object_storage->existsOrHasAnyChild(metadata_key_prefix / "") && object_storage->existsOrHasAnyChild(common_key_prefix / "") + ? common_key_prefix + : metadata_key_prefix; +} + +MetadataStorageFromPlainObjectStorage::PathMap loadPathPrefixMap(const std::string & metadata_key_prefix, ObjectStoragePtr object_storage) { MetadataStorageFromPlainObjectStorage::PathMap result; @@ -39,16 +54,16 @@ MetadataStorageFromPlainObjectStorage::PathMap loadPathPrefixMap(const std::stri LOG_DEBUG(log, "Loading metadata"); size_t num_files = 0; - for (auto iterator = object_storage->iterate(root, 0); iterator->isValid(); iterator->next()) + for (auto iterator = object_storage->iterate(metadata_key_prefix, 0); iterator->isValid(); iterator->next()) { ++num_files; auto file = iterator->current(); String path = file->getPath(); - auto remote_path = std::filesystem::path(path); - if (remote_path.filename() != PREFIX_PATH_FILE_NAME) + auto remote_metadata_path = std::filesystem::path(path); + if (remote_metadata_path.filename() != PREFIX_PATH_FILE_NAME) continue; - runner([remote_path, path, &object_storage, &result, &mutex, &log, &settings] + runner([remote_metadata_path, path, &object_storage, &result, &mutex, &log, &settings, &metadata_key_prefix] { setThreadName("PlainRWMetaLoad"); @@ -75,7 +90,10 @@ MetadataStorageFromPlainObjectStorage::PathMap loadPathPrefixMap(const std::stri throw; } - chassert(remote_path.has_parent_path()); + chassert(remote_metadata_path.has_parent_path()); + chassert(remote_metadata_path.string().starts_with(metadata_key_prefix)); + auto suffix = remote_metadata_path.string().substr(metadata_key_prefix.size()); + auto remote_path = std::filesystem::path(std::move(suffix)); std::pair res; { std::lock_guard lock(mutex); @@ -103,17 +121,17 @@ MetadataStorageFromPlainObjectStorage::PathMap loadPathPrefixMap(const std::stri return result; } -std::vector getDirectChildrenOnRewritableDisk( +void getDirectChildrenOnRewritableDisk( const std::string & storage_key, + const std::string & storage_key_perfix, const RelativePathsWithMetadata & remote_paths, const std::string & local_path, const MetadataStorageFromPlainObjectStorage::PathMap & local_path_prefixes, - SharedMutex & shared_mutex) + SharedMutex & shared_mutex, + std::unordered_set & result) { using PathMap = MetadataStorageFromPlainObjectStorage::PathMap; - std::unordered_set duplicates_filter; - /// Map remote paths into local subdirectories. std::unordered_map remote_to_local_subdir; @@ -149,22 +167,21 @@ std::vector getDirectChildrenOnRewritableDisk( /// File names. auto filename = path.substr(child_pos); if (!skip_list.contains(filename)) - duplicates_filter.emplace(std::move(filename)); + result.emplace(std::move(filename)); } else { /// Subdirectories. - auto it = remote_to_local_subdir.find(path.substr(0, slash_pos)); + chassert(path.find(storage_key_perfix) == 0); + auto it = remote_to_local_subdir.find(path.substr(storage_key_perfix.size(), slash_pos - storage_key_perfix.size())); /// Mapped subdirectories. if (it != remote_to_local_subdir.end()) - duplicates_filter.emplace(it->second); + result.emplace(it->second); /// The remote subdirectory name is the same as the local subdirectory. else - duplicates_filter.emplace(path.substr(child_pos, slash_pos - child_pos)); + result.emplace(path.substr(child_pos, slash_pos - child_pos)); } } - - return std::vector(std::make_move_iterator(duplicates_filter.begin()), std::make_move_iterator(duplicates_filter.end())); } } @@ -172,7 +189,8 @@ std::vector getDirectChildrenOnRewritableDisk( MetadataStorageFromPlainRewritableObjectStorage::MetadataStorageFromPlainRewritableObjectStorage( ObjectStoragePtr object_storage_, String storage_path_prefix_) : MetadataStorageFromPlainObjectStorage(object_storage_, storage_path_prefix_) - , path_map(std::make_shared(loadPathPrefixMap(object_storage->getCommonKeyPrefix(), object_storage))) + , metadata_key_prefix(DB::getMetadataKeyPrefix(object_storage)) + , path_map(std::make_shared(loadPathPrefixMap(metadata_key_prefix, object_storage))) { if (object_storage->isWriteOnce()) throw Exception( @@ -190,10 +208,71 @@ MetadataStorageFromPlainRewritableObjectStorage::~MetadataStorageFromPlainRewrit CurrentMetrics::sub(metric, path_map->size()); } -std::vector MetadataStorageFromPlainRewritableObjectStorage::getDirectChildrenOnDisk( - const std::string & storage_key, const RelativePathsWithMetadata & remote_paths, const std::string & local_path) const +bool MetadataStorageFromPlainRewritableObjectStorage::exists(const std::string & path) const { - return getDirectChildrenOnRewritableDisk(storage_key, remote_paths, local_path, *getPathMap(), metadata_mutex); + if (MetadataStorageFromPlainObjectStorage::exists(path)) + return true; + + if (getMetadataKeyPrefix() != object_storage->getCommonKeyPrefix()) + { + auto key_prefix = object_storage->generateObjectKeyForPath(path).serialize(); + chassert(key_prefix.starts_with(object_storage->getCommonKeyPrefix())); + auto metadata_key = std::filesystem::path(getMetadataKeyPrefix()) / key_prefix.substr(object_storage->getCommonKeyPrefix().size()); + return object_storage->existsOrHasAnyChild(metadata_key); + } + + return false; +} + +bool MetadataStorageFromPlainRewritableObjectStorage::isDirectory(const std::string & path) const +{ + if (getMetadataKeyPrefix() != object_storage->getCommonKeyPrefix()) + { + auto directory = std::filesystem::path(object_storage->generateObjectKeyForPath(path).serialize()) / ""; + chassert(directory.string().starts_with(object_storage->getCommonKeyPrefix())); + auto metadata_key + = std::filesystem::path(getMetadataKeyPrefix()) / directory.string().substr(object_storage->getCommonKeyPrefix().size()); + return object_storage->existsOrHasAnyChild(metadata_key); + } + else + return MetadataStorageFromPlainObjectStorage::isDirectory(path); +} + +std::vector MetadataStorageFromPlainRewritableObjectStorage::listDirectory(const std::string & path) const +{ + auto key_prefix = object_storage->generateObjectKeyForPath(path).serialize(); + + RelativePathsWithMetadata files; + std::string abs_key = key_prefix; + if (!abs_key.ends_with('/')) + abs_key += '/'; + + object_storage->listObjects(abs_key, files, 0); + + std::unordered_set directories; + getDirectChildrenOnDisk(abs_key, object_storage->getCommonKeyPrefix(), files, path, directories); + /// List empty directories that are identified by the `prefix.path` metadata files. This is required to, e.g., remove + /// metadata along with regular files. + if (object_storage->getCommonKeyPrefix() != getMetadataKeyPrefix()) + { + chassert(abs_key.starts_with(object_storage->getCommonKeyPrefix())); + auto metadata_key = std::filesystem::path(getMetadataKeyPrefix()) / abs_key.substr(object_storage->getCommonKeyPrefix().size()); + RelativePathsWithMetadata metadata_files; + object_storage->listObjects(metadata_key, metadata_files, 0); + getDirectChildrenOnDisk(metadata_key, getMetadataKeyPrefix(), metadata_files, path, directories); + } + + return std::vector(std::make_move_iterator(directories.begin()), std::make_move_iterator(directories.end())); +} + +void MetadataStorageFromPlainRewritableObjectStorage::getDirectChildrenOnDisk( + const std::string & storage_key, + const std::string & storage_key_perfix, + const RelativePathsWithMetadata & remote_paths, + const std::string & local_path, + std::unordered_set & result) const +{ + getDirectChildrenOnRewritableDisk(storage_key, storage_key_perfix, remote_paths, local_path, *getPathMap(), metadata_mutex, result); } } diff --git a/src/Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.h b/src/Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.h index a5394b9428d..71153cbdc25 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.h +++ b/src/Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.h @@ -3,6 +3,7 @@ #include #include +#include namespace DB @@ -11,6 +12,7 @@ namespace DB class MetadataStorageFromPlainRewritableObjectStorage final : public MetadataStorageFromPlainObjectStorage { private: + const std::string metadata_key_prefix; std::shared_ptr path_map; public: @@ -18,11 +20,20 @@ public: ~MetadataStorageFromPlainRewritableObjectStorage() override; MetadataStorageType getType() const override { return MetadataStorageType::PlainRewritable; } + bool exists(const std::string & path) const override; + bool isDirectory(const std::string & path) const override; + std::vector listDirectory(const std::string & path) const override; + protected: + std::string getMetadataKeyPrefix() const override { return metadata_key_prefix; } std::shared_ptr getPathMap() const override { return path_map; } - std::vector getDirectChildrenOnDisk( - const std::string & storage_key, const RelativePathsWithMetadata & remote_paths, const std::string & local_path) const override; + void getDirectChildrenOnDisk( + const std::string & storage_key, + const std::string & storage_key_perfix, + const RelativePathsWithMetadata & remote_paths, + const std::string & local_path, + std::unordered_set & result) const override; }; } diff --git a/tests/integration/test_s3_plain_rewritable/test.py b/tests/integration/test_s3_plain_rewritable/test.py index 4b1aaafc814..020e170eb48 100644 --- a/tests/integration/test_s3_plain_rewritable/test.py +++ b/tests/integration/test_s3_plain_rewritable/test.py @@ -139,6 +139,19 @@ def test(storage_policy): == insert_values_arr[i] ) + metadata_it = cluster.minio_client.list_objects( + cluster.minio_bucket, "data/", recursive=True + ) + metadata_count = 0 + for obj in list(metadata_it): + if "/__meta/" in obj.object_name: + assert obj.object_name.endswith("/prefix.path") + metadata_count += 1 + else: + assert not obj.object_name.endswith("/prefix.path") + + assert metadata_count > 0 + for i in range(NUM_WORKERS): node = cluster.instances[f"node{i + 1}"] node.query("DROP TABLE IF EXISTS test SYNC") From 98ad45ba960de4cc29ee794e2eeccf9fe6f8e0a8 Mon Sep 17 00:00:00 2001 From: Julia Kartseva Date: Fri, 28 Jun 2024 00:58:20 +0000 Subject: [PATCH 570/661] Add prefix_path parameter to object key generator --- src/Common/ObjectStorageKeyGenerator.cpp | 11 ++++++++--- src/Common/ObjectStorageKeyGenerator.h | 3 ++- .../AzureBlobStorage/AzureObjectStorage.cpp | 4 +++- .../AzureBlobStorage/AzureObjectStorage.h | 2 +- .../Cached/CachedObjectStorage.cpp | 10 ++++++---- .../ObjectStorages/Cached/CachedObjectStorage.h | 5 +++-- .../CommonPathPrefixKeyGenerator.cpp | 3 ++- .../CommonPathPrefixKeyGenerator.h | 3 ++- .../DiskObjectStorageTransaction.cpp | 6 +++--- .../ObjectStorages/HDFS/HDFSObjectStorage.cpp | 6 ++++-- .../ObjectStorages/HDFS/HDFSObjectStorage.h | 2 +- src/Disks/ObjectStorages/IObjectStorage.h | 5 +++-- .../ObjectStorages/Local/LocalObjectStorage.cpp | 14 ++++++++------ .../ObjectStorages/Local/LocalObjectStorage.h | 2 +- .../MetadataStorageFromPlainObjectStorage.cpp | 14 +++++++------- ...aStorageFromPlainRewritableObjectStorage.cpp | 6 +++--- src/Disks/ObjectStorages/PlainObjectStorage.h | 2 +- .../PlainRewritableObjectStorage.h | 17 +++++++++++------ src/Disks/ObjectStorages/S3/DiskS3Utils.cpp | 2 +- src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp | 4 ++-- src/Disks/ObjectStorages/S3/S3ObjectStorage.h | 2 +- src/Disks/ObjectStorages/Web/WebObjectStorage.h | 2 +- 22 files changed, 74 insertions(+), 51 deletions(-) diff --git a/src/Common/ObjectStorageKeyGenerator.cpp b/src/Common/ObjectStorageKeyGenerator.cpp index e9212c3f04d..3e7bf3116bd 100644 --- a/src/Common/ObjectStorageKeyGenerator.cpp +++ b/src/Common/ObjectStorageKeyGenerator.cpp @@ -3,6 +3,7 @@ #include #include +#include #include @@ -14,7 +15,10 @@ public: , re_gen(key_template) { } - DB::ObjectStorageKey generate(const String &, bool) const override { return DB::ObjectStorageKey::createAsAbsolute(re_gen.generate()); } + DB::ObjectStorageKey generate(const String &, bool /* is_directory */, const std::optional & /* key_prefix */) const override + { + return DB::ObjectStorageKey::createAsAbsolute(re_gen.generate()); + } private: String key_template; @@ -29,7 +33,7 @@ public: : key_prefix(std::move(key_prefix_)) {} - DB::ObjectStorageKey generate(const String &, bool) const override + DB::ObjectStorageKey generate(const String &, bool /* is_directory */, const std::optional & /* key_prefix */) const override { /// Path to store the new S3 object. @@ -60,7 +64,8 @@ public: : key_prefix(std::move(key_prefix_)) {} - DB::ObjectStorageKey generate(const String & path, bool) const override + DB::ObjectStorageKey + generate(const String & path, bool /* is_directory */, const std::optional & /* key_prefix */) const override { return DB::ObjectStorageKey::createAsRelative(key_prefix, path); } diff --git a/src/Common/ObjectStorageKeyGenerator.h b/src/Common/ObjectStorageKeyGenerator.h index 11da039b33b..12aeec1714d 100644 --- a/src/Common/ObjectStorageKeyGenerator.h +++ b/src/Common/ObjectStorageKeyGenerator.h @@ -11,7 +11,8 @@ class IObjectStorageKeysGenerator public: virtual ~IObjectStorageKeysGenerator() = default; - virtual ObjectStorageKey generate(const String & path, bool is_directory) const = 0; + /// Generates an object storage key based on a path in the virtual filesystem. + virtual ObjectStorageKey generate(const String & path, bool is_directory, const std::optional & key_prefix) const = 0; }; using ObjectStorageKeysGeneratorPtr = std::shared_ptr; diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp index bc16955143b..0d92561d142 100644 --- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp +++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp @@ -1,3 +1,4 @@ +#include #include #include "Common/Exception.h" @@ -117,7 +118,8 @@ AzureObjectStorage::AzureObjectStorage( { } -ObjectStorageKey AzureObjectStorage::generateObjectKeyForPath(const std::string & /* path */) const +ObjectStorageKey +AzureObjectStorage::generateObjectKeyForPath(const std::string & /* path */, const std::optional & /* key_prefix */) const { return ObjectStorageKey::createAsRelative(getRandomASCIIString(32)); } diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h index 2c7ce5e18dc..bc90b05e64d 100644 --- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h +++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h @@ -101,7 +101,7 @@ public: const std::string & config_prefix, ContextPtr context) override; - ObjectStorageKey generateObjectKeyForPath(const std::string & path) const override; + ObjectStorageKey generateObjectKeyForPath(const std::string & path, const std::optional & key_prefix) const override; bool isRemote() const override { return true; } diff --git a/src/Disks/ObjectStorages/Cached/CachedObjectStorage.cpp b/src/Disks/ObjectStorages/Cached/CachedObjectStorage.cpp index a3b6e25e8ea..fb817005399 100644 --- a/src/Disks/ObjectStorages/Cached/CachedObjectStorage.cpp +++ b/src/Disks/ObjectStorages/Cached/CachedObjectStorage.cpp @@ -34,14 +34,16 @@ FileCache::Key CachedObjectStorage::getCacheKey(const std::string & path) const return cache->createKeyForPath(path); } -ObjectStorageKey CachedObjectStorage::generateObjectKeyForPath(const std::string & path) const +ObjectStorageKey +CachedObjectStorage::generateObjectKeyForPath(const std::string & path, const std::optional & key_prefix) const { - return object_storage->generateObjectKeyForPath(path); + return object_storage->generateObjectKeyForPath(path, key_prefix); } -ObjectStorageKey CachedObjectStorage::generateObjectKeyPrefixForDirectoryPath(const std::string & path) const +ObjectStorageKey +CachedObjectStorage::generateObjectKeyPrefixForDirectoryPath(const std::string & path, const std::optional & key_prefix) const { - return object_storage->generateObjectKeyPrefixForDirectoryPath(path); + return object_storage->generateObjectKeyPrefixForDirectoryPath(path, key_prefix); } ReadSettings CachedObjectStorage::patchSettings(const ReadSettings & read_settings) const diff --git a/src/Disks/ObjectStorages/Cached/CachedObjectStorage.h b/src/Disks/ObjectStorages/Cached/CachedObjectStorage.h index 93ef2659cbb..efcdbfebabf 100644 --- a/src/Disks/ObjectStorages/Cached/CachedObjectStorage.h +++ b/src/Disks/ObjectStorages/Cached/CachedObjectStorage.h @@ -98,9 +98,10 @@ public: const std::string & getCacheName() const override { return cache_config_name; } - ObjectStorageKey generateObjectKeyForPath(const std::string & path) const override; + ObjectStorageKey generateObjectKeyForPath(const std::string & path, const std::optional & key_prefix) const override; - ObjectStorageKey generateObjectKeyPrefixForDirectoryPath(const std::string & path) const override; + ObjectStorageKey + generateObjectKeyPrefixForDirectoryPath(const std::string & path, const std::optional & key_prefix) const override; void setKeysGenerator(ObjectStorageKeysGeneratorPtr gen) override { object_storage->setKeysGenerator(gen); } diff --git a/src/Disks/ObjectStorages/CommonPathPrefixKeyGenerator.cpp b/src/Disks/ObjectStorages/CommonPathPrefixKeyGenerator.cpp index 2a06d56e5c7..0a4426e8e66 100644 --- a/src/Disks/ObjectStorages/CommonPathPrefixKeyGenerator.cpp +++ b/src/Disks/ObjectStorages/CommonPathPrefixKeyGenerator.cpp @@ -15,7 +15,8 @@ CommonPathPrefixKeyGenerator::CommonPathPrefixKeyGenerator( { } -ObjectStorageKey CommonPathPrefixKeyGenerator::generate(const String & path, bool is_directory) const +ObjectStorageKey +CommonPathPrefixKeyGenerator::generate(const String & path, bool is_directory, const std::optional & /* key_prefix */) const { const auto & [object_key_prefix, suffix_parts] = getLongestObjectKeyPrefix(path); diff --git a/src/Disks/ObjectStorages/CommonPathPrefixKeyGenerator.h b/src/Disks/ObjectStorages/CommonPathPrefixKeyGenerator.h index fb1140de908..08495738505 100644 --- a/src/Disks/ObjectStorages/CommonPathPrefixKeyGenerator.h +++ b/src/Disks/ObjectStorages/CommonPathPrefixKeyGenerator.h @@ -5,6 +5,7 @@ #include #include +#include namespace DB { @@ -26,7 +27,7 @@ public: explicit CommonPathPrefixKeyGenerator(String key_prefix_, SharedMutex & shared_mutex_, std::weak_ptr path_map_); - ObjectStorageKey generate(const String & path, bool is_directory) const override; + ObjectStorageKey generate(const String & path, bool is_directory, const std::optional & key_prefix) const override; private: /// Longest key prefix and unresolved parts of the source path. diff --git a/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp b/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp index b5805f6d23a..880911b9958 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp +++ b/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp @@ -537,7 +537,7 @@ struct CopyFileObjectStorageOperation final : public IDiskObjectStorageOperation for (const auto & object_from : source_blobs) { - auto object_key = destination_object_storage.generateObjectKeyForPath(to_path); + auto object_key = destination_object_storage.generateObjectKeyForPath(to_path, std::nullopt /* key_prefix */); auto object_to = StoredObject(object_key.serialize()); object_storage.copyObjectToAnotherObjectStorage(object_from, object_to,read_settings,write_settings, destination_object_storage); @@ -738,7 +738,7 @@ std::unique_ptr DiskObjectStorageTransaction::writeFile const WriteSettings & settings, bool autocommit) { - auto object_key = object_storage.generateObjectKeyForPath(path); + auto object_key = object_storage.generateObjectKeyForPath(path, std::nullopt /* key_prefix */); std::optional object_attributes; if (metadata_helper) @@ -835,7 +835,7 @@ void DiskObjectStorageTransaction::writeFileUsingBlobWritingFunction( const String & path, WriteMode mode, WriteBlobFunction && write_blob_function) { /// This function is a simplified and adapted version of DiskObjectStorageTransaction::writeFile(). - auto object_key = object_storage.generateObjectKeyForPath(path); + auto object_key = object_storage.generateObjectKeyForPath(path, std::nullopt /* key_prefix */); std::optional object_attributes; if (metadata_helper) diff --git a/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp index dcb2af9d4d3..3ce2a0f4903 100644 --- a/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp +++ b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp @@ -4,8 +4,9 @@ #include #include -#include +#include #include +#include #include #include @@ -53,7 +54,8 @@ std::string HDFSObjectStorage::extractObjectKeyFromURL(const StoredObject & obje return path; } -ObjectStorageKey HDFSObjectStorage::generateObjectKeyForPath(const std::string & /* path */) const +ObjectStorageKey +HDFSObjectStorage::generateObjectKeyForPath(const std::string & /* path */, const std::optional & /* key_prefix */) const { initializeHDFSFS(); /// what ever data_source_description.description value is, consider that key as relative key diff --git a/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.h b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.h index 8aae90d0721..0cb31eb8b8b 100644 --- a/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.h +++ b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.h @@ -111,7 +111,7 @@ public: const std::string & config_prefix, ContextPtr context) override; - ObjectStorageKey generateObjectKeyForPath(const std::string & path) const override; + ObjectStorageKey generateObjectKeyForPath(const std::string & path, const std::optional & key_prefix) const override; bool isRemote() const override { return true; } diff --git a/src/Disks/ObjectStorages/IObjectStorage.h b/src/Disks/ObjectStorages/IObjectStorage.h index ceea4d5a2bb..529c79790fd 100644 --- a/src/Disks/ObjectStorages/IObjectStorage.h +++ b/src/Disks/ObjectStorages/IObjectStorage.h @@ -232,10 +232,11 @@ public: /// Generate blob name for passed absolute local path. /// Path can be generated either independently or based on `path`. - virtual ObjectStorageKey generateObjectKeyForPath(const std::string & path) const = 0; + virtual ObjectStorageKey generateObjectKeyForPath(const std::string & path, const std::optional & key_prefix) const = 0; /// Object key prefix for local paths in the directory 'path'. - virtual ObjectStorageKey generateObjectKeyPrefixForDirectoryPath(const std::string & /* path */) const + virtual ObjectStorageKey + generateObjectKeyPrefixForDirectoryPath(const std::string & /* path */, const std::optional & /* key_prefix */) const { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method 'generateObjectKeyPrefixForDirectoryPath' is not implemented"); } diff --git a/src/Disks/ObjectStorages/Local/LocalObjectStorage.cpp b/src/Disks/ObjectStorages/Local/LocalObjectStorage.cpp index a247d86ddce..20ef135cdf7 100644 --- a/src/Disks/ObjectStorages/Local/LocalObjectStorage.cpp +++ b/src/Disks/ObjectStorages/Local/LocalObjectStorage.cpp @@ -1,15 +1,16 @@ #include -#include -#include -#include +#include +#include +#include #include #include -#include #include #include +#include +#include #include -#include +#include namespace fs = std::filesystem; @@ -222,7 +223,8 @@ std::unique_ptr LocalObjectStorage::cloneObjectStorage( throw Exception(ErrorCodes::NOT_IMPLEMENTED, "cloneObjectStorage() is not implemented for LocalObjectStorage"); } -ObjectStorageKey LocalObjectStorage::generateObjectKeyForPath(const std::string & /* path */) const +ObjectStorageKey +LocalObjectStorage::generateObjectKeyForPath(const std::string & /* path */, const std::optional & /* key_prefix */) const { constexpr size_t key_name_total_size = 32; return ObjectStorageKey::createAsRelative(key_prefix, getRandomASCIIString(key_name_total_size)); diff --git a/src/Disks/ObjectStorages/Local/LocalObjectStorage.h b/src/Disks/ObjectStorages/Local/LocalObjectStorage.h index 371cd37f8b2..564d49bf876 100644 --- a/src/Disks/ObjectStorages/Local/LocalObjectStorage.h +++ b/src/Disks/ObjectStorages/Local/LocalObjectStorage.h @@ -81,7 +81,7 @@ public: const std::string & config_prefix, ContextPtr context) override; - ObjectStorageKey generateObjectKeyForPath(const std::string & path) const override; + ObjectStorageKey generateObjectKeyForPath(const std::string & path, const std::optional & key_prefix) const override; bool isRemote() const override { return false; } diff --git a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp index 3da190c7256..589b18abca8 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp +++ b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp @@ -42,7 +42,7 @@ bool MetadataStorageFromPlainObjectStorage::exists(const std::string & path) con { /// NOTE: exists() cannot be used here since it works only for existing /// key, and does not work for some intermediate path. - auto object_key = object_storage->generateObjectKeyForPath(path); + auto object_key = object_storage->generateObjectKeyForPath(path, std::nullopt /* key_prefix */); return object_storage->existsOrHasAnyChild(object_key.serialize()); } @@ -54,7 +54,7 @@ bool MetadataStorageFromPlainObjectStorage::isFile(const std::string & path) con bool MetadataStorageFromPlainObjectStorage::isDirectory(const std::string & path) const { - auto key_prefix = object_storage->generateObjectKeyForPath(path).serialize(); + auto key_prefix = object_storage->generateObjectKeyForPath(path, std::nullopt /* key_prefix */).serialize(); auto directory = std::filesystem::path(std::move(key_prefix)) / ""; return object_storage->existsOrHasAnyChild(directory); @@ -62,7 +62,7 @@ bool MetadataStorageFromPlainObjectStorage::isDirectory(const std::string & path uint64_t MetadataStorageFromPlainObjectStorage::getFileSize(const String & path) const { - auto object_key = object_storage->generateObjectKeyForPath(path); + auto object_key = object_storage->generateObjectKeyForPath(path, std::nullopt /* key_prefix */); auto metadata = object_storage->tryGetObjectMetadata(object_key.serialize()); if (metadata) return metadata->size_bytes; @@ -71,7 +71,7 @@ uint64_t MetadataStorageFromPlainObjectStorage::getFileSize(const String & path) std::vector MetadataStorageFromPlainObjectStorage::listDirectory(const std::string & path) const { - auto key_prefix = object_storage->generateObjectKeyForPath(path).serialize(); + auto key_prefix = object_storage->generateObjectKeyForPath(path, std::nullopt /* key_prefix */).serialize(); RelativePathsWithMetadata files; std::string abs_key = key_prefix; @@ -98,7 +98,7 @@ DirectoryIteratorPtr MetadataStorageFromPlainObjectStorage::iterateDirectory(con StoredObjects MetadataStorageFromPlainObjectStorage::getStorageObjects(const std::string & path) const { size_t object_size = getFileSize(path); - auto object_key = object_storage->generateObjectKeyForPath(path); + auto object_key = object_storage->generateObjectKeyForPath(path, std::nullopt /* key_prefix */); return {StoredObject(object_key.serialize(), path, object_size)}; } @@ -130,7 +130,7 @@ const IMetadataStorage & MetadataStorageFromPlainObjectStorageTransaction::getSt void MetadataStorageFromPlainObjectStorageTransaction::unlinkFile(const std::string & path) { - auto object_key = metadata_storage.object_storage->generateObjectKeyForPath(path); + auto object_key = metadata_storage.object_storage->generateObjectKeyForPath(path, std::nullopt /* key_prefix */); auto object = StoredObject(object_key.serialize()); metadata_storage.object_storage->removeObject(object); } @@ -155,7 +155,7 @@ void MetadataStorageFromPlainObjectStorageTransaction::createDirectory(const std return; auto normalized_path = normalizeDirectoryPath(path); - auto key_prefix = object_storage->generateObjectKeyPrefixForDirectoryPath(normalized_path).serialize(); + auto key_prefix = object_storage->generateObjectKeyPrefixForDirectoryPath(normalized_path, std::nullopt /* key_prefix */).serialize(); chassert(key_prefix.starts_with(object_storage->getCommonKeyPrefix())); auto op = std::make_unique( std::move(normalized_path), diff --git a/src/Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.cpp b/src/Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.cpp index f3d00a928e3..de65cd5c233 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.cpp +++ b/src/Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.cpp @@ -215,7 +215,7 @@ bool MetadataStorageFromPlainRewritableObjectStorage::exists(const std::string & if (getMetadataKeyPrefix() != object_storage->getCommonKeyPrefix()) { - auto key_prefix = object_storage->generateObjectKeyForPath(path).serialize(); + auto key_prefix = object_storage->generateObjectKeyForPath(path, std::nullopt /* key_prefix */).serialize(); chassert(key_prefix.starts_with(object_storage->getCommonKeyPrefix())); auto metadata_key = std::filesystem::path(getMetadataKeyPrefix()) / key_prefix.substr(object_storage->getCommonKeyPrefix().size()); return object_storage->existsOrHasAnyChild(metadata_key); @@ -228,7 +228,7 @@ bool MetadataStorageFromPlainRewritableObjectStorage::isDirectory(const std::str { if (getMetadataKeyPrefix() != object_storage->getCommonKeyPrefix()) { - auto directory = std::filesystem::path(object_storage->generateObjectKeyForPath(path).serialize()) / ""; + auto directory = std::filesystem::path(object_storage->generateObjectKeyForPath(path, std::nullopt /* key_prefix */).serialize()) / ""; chassert(directory.string().starts_with(object_storage->getCommonKeyPrefix())); auto metadata_key = std::filesystem::path(getMetadataKeyPrefix()) / directory.string().substr(object_storage->getCommonKeyPrefix().size()); @@ -240,7 +240,7 @@ bool MetadataStorageFromPlainRewritableObjectStorage::isDirectory(const std::str std::vector MetadataStorageFromPlainRewritableObjectStorage::listDirectory(const std::string & path) const { - auto key_prefix = object_storage->generateObjectKeyForPath(path).serialize(); + auto key_prefix = object_storage->generateObjectKeyForPath(path, std::nullopt /* key_prefix */).serialize(); RelativePathsWithMetadata files; std::string abs_key = key_prefix; diff --git a/src/Disks/ObjectStorages/PlainObjectStorage.h b/src/Disks/ObjectStorages/PlainObjectStorage.h index e0907d0b4d8..805b3436fce 100644 --- a/src/Disks/ObjectStorages/PlainObjectStorage.h +++ b/src/Disks/ObjectStorages/PlainObjectStorage.h @@ -26,7 +26,7 @@ public: bool isPlain() const override { return true; } - ObjectStorageKey generateObjectKeyForPath(const std::string & path) const override + ObjectStorageKey generateObjectKeyForPath(const std::string & path, const std::optional & /* key_prefix */) const override { return ObjectStorageKey::createAsRelative(BaseObjectStorage::getCommonKeyPrefix(), path); } diff --git a/src/Disks/ObjectStorages/PlainRewritableObjectStorage.h b/src/Disks/ObjectStorages/PlainRewritableObjectStorage.h index 5f000afe625..dcea5964fc5 100644 --- a/src/Disks/ObjectStorages/PlainRewritableObjectStorage.h +++ b/src/Disks/ObjectStorages/PlainRewritableObjectStorage.h @@ -1,5 +1,7 @@ #pragma once +#include +#include #include #include #include "CommonPathPrefixKeyGenerator.h" @@ -33,9 +35,10 @@ public: bool isPlain() const override { return true; } - ObjectStorageKey generateObjectKeyForPath(const std::string & path) const override; + ObjectStorageKey generateObjectKeyForPath(const std::string & path, const std::optional & key_prefix) const override; - ObjectStorageKey generateObjectKeyPrefixForDirectoryPath(const std::string & path) const override; + ObjectStorageKey + generateObjectKeyPrefixForDirectoryPath(const std::string & path, const std::optional & key_prefix) const override; void setKeysGenerator(ObjectStorageKeysGeneratorPtr gen) override { key_generator = gen; } @@ -46,20 +49,22 @@ private: template -ObjectStorageKey PlainRewritableObjectStorage::generateObjectKeyForPath(const std::string & path) const +ObjectStorageKey PlainRewritableObjectStorage::generateObjectKeyForPath( + const std::string & path, const std::optional & key_prefix) const { if (!key_generator) throw Exception(ErrorCodes::LOGICAL_ERROR, "Key generator is not set"); - return key_generator->generate(path, /* is_directory */ false); + return key_generator->generate(path, /* is_directory */ false, key_prefix); } template -ObjectStorageKey PlainRewritableObjectStorage::generateObjectKeyPrefixForDirectoryPath(const std::string & path) const +ObjectStorageKey PlainRewritableObjectStorage::generateObjectKeyPrefixForDirectoryPath( + const std::string & path, const std::optional & key_prefix) const { if (!key_generator) throw Exception(ErrorCodes::LOGICAL_ERROR, "Key generator is not set"); - return key_generator->generate(path, /* is_directory */ true); + return key_generator->generate(path, /* is_directory */ true, key_prefix); } } diff --git a/src/Disks/ObjectStorages/S3/DiskS3Utils.cpp b/src/Disks/ObjectStorages/S3/DiskS3Utils.cpp index 63e7ebb00c5..b20a2940e47 100644 --- a/src/Disks/ObjectStorages/S3/DiskS3Utils.cpp +++ b/src/Disks/ObjectStorages/S3/DiskS3Utils.cpp @@ -79,7 +79,7 @@ bool checkBatchRemove(S3ObjectStorage & storage) /// We are using generateObjectKeyForPath() which returns random object key. /// That generated key is placed in a right directory where we should have write access. const String path = fmt::format("clickhouse_remove_objects_capability_{}", getServerUUID()); - const auto key = storage.generateObjectKeyForPath(path); + const auto key = storage.generateObjectKeyForPath(path, {} /* key_prefix */); StoredObject object(key.serialize(), path); try { diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp index a6672e14e10..3c4b4d76bf5 100644 --- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp +++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp @@ -624,12 +624,12 @@ std::unique_ptr S3ObjectStorage::cloneObjectStorage( std::move(new_client), std::move(new_s3_settings), new_uri, s3_capabilities, key_generator, disk_name); } -ObjectStorageKey S3ObjectStorage::generateObjectKeyForPath(const std::string & path) const +ObjectStorageKey S3ObjectStorage::generateObjectKeyForPath(const std::string & path, const std::optional & key_prefix) const { if (!key_generator) throw Exception(ErrorCodes::LOGICAL_ERROR, "Key generator is not set"); - return key_generator->generate(path, /* is_directory */ false); + return key_generator->generate(path, /* is_directory */ false, key_prefix); } std::shared_ptr S3ObjectStorage::getS3StorageClient() diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.h b/src/Disks/ObjectStorages/S3/S3ObjectStorage.h index cbe004bc298..d786a6b37f3 100644 --- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.h +++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.h @@ -164,7 +164,7 @@ public: bool supportParallelWrite() const override { return true; } - ObjectStorageKey generateObjectKeyForPath(const std::string & path) const override; + ObjectStorageKey generateObjectKeyForPath(const std::string & path, const std::optional & key_prefix) const override; bool isReadOnly() const override { return s3_settings.get()->read_only; } diff --git a/src/Disks/ObjectStorages/Web/WebObjectStorage.h b/src/Disks/ObjectStorages/Web/WebObjectStorage.h index 9ca2950dae0..ab357d6f50d 100644 --- a/src/Disks/ObjectStorages/Web/WebObjectStorage.h +++ b/src/Disks/ObjectStorages/Web/WebObjectStorage.h @@ -82,7 +82,7 @@ public: const std::string & config_prefix, ContextPtr context) override; - ObjectStorageKey generateObjectKeyForPath(const std::string & path) const override + ObjectStorageKey generateObjectKeyForPath(const std::string & path, const std::optional & /* key_prefix */) const override { return ObjectStorageKey::createAsRelative(path); } From 27392fee6eec22c7f2dac3d17f73ab9a528f1fc8 Mon Sep 17 00:00:00 2001 From: Julia Kartseva Date: Fri, 28 Jun 2024 01:30:49 +0000 Subject: [PATCH 571/661] Minor refactor --- .../CommonPathPrefixKeyGenerator.cpp | 8 ++-- .../MetadataStorageFromPlainObjectStorage.cpp | 38 +++++++------------ .../MetadataStorageFromPlainObjectStorage.h | 8 ---- ...torageFromPlainRewritableObjectStorage.cpp | 26 +++++-------- ...aStorageFromPlainRewritableObjectStorage.h | 2 +- 5 files changed, 28 insertions(+), 54 deletions(-) diff --git a/src/Disks/ObjectStorages/CommonPathPrefixKeyGenerator.cpp b/src/Disks/ObjectStorages/CommonPathPrefixKeyGenerator.cpp index 0a4426e8e66..ef599a2f366 100644 --- a/src/Disks/ObjectStorages/CommonPathPrefixKeyGenerator.cpp +++ b/src/Disks/ObjectStorages/CommonPathPrefixKeyGenerator.cpp @@ -16,15 +16,15 @@ CommonPathPrefixKeyGenerator::CommonPathPrefixKeyGenerator( } ObjectStorageKey -CommonPathPrefixKeyGenerator::generate(const String & path, bool is_directory, const std::optional & /* key_prefix */) const +CommonPathPrefixKeyGenerator::generate(const String & path, bool is_directory, const std::optional & key_prefix) const { const auto & [object_key_prefix, suffix_parts] = getLongestObjectKeyPrefix(path); - auto key = std::filesystem::path(object_key_prefix.empty() ? std::string() : object_key_prefix); + auto key = std::filesystem::path(object_key_prefix); /// The longest prefix is the same as path, meaning that the path is already mapped. if (suffix_parts.empty()) - return ObjectStorageKey::createAsRelative(storage_key_prefix, std::move(key)); + return ObjectStorageKey::createAsRelative(key_prefix.has_value() ? *key_prefix : storage_key_prefix, std::move(key)); /// File and top-level directory paths are mapped as is. if (!is_directory || object_key_prefix.empty()) @@ -40,7 +40,7 @@ CommonPathPrefixKeyGenerator::generate(const String & path, bool is_directory, c key /= getRandomASCIIString(part_size); } - return ObjectStorageKey::createAsRelative(storage_key_prefix, key); + return ObjectStorageKey::createAsRelative(key_prefix.has_value() ? *key_prefix : storage_key_prefix, key); } std::tuple> CommonPathPrefixKeyGenerator::getLongestObjectKeyPrefix(const std::string & path) const diff --git a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp index 589b18abca8..02048c07a57 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp +++ b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp @@ -80,9 +80,20 @@ std::vector MetadataStorageFromPlainObjectStorage::listDirectory(co object_storage->listObjects(abs_key, files, 0); - std::unordered_set directories; - getDirectChildrenOnDisk(abs_key, object_storage->getCommonKeyPrefix(), files, path, directories); - return std::vector(std::make_move_iterator(directories.begin()), std::make_move_iterator(directories.end())); + std::unordered_set result; + for (const auto & elem : files) + { + const auto & p = elem->relative_path; + chassert(p.find(abs_key) == 0); + const auto child_pos = abs_key.size(); + /// string::npos is ok. + const auto slash_pos = p.find('/', child_pos); + if (slash_pos == std::string::npos) + result.emplace(p.substr(child_pos)); + else + result.emplace(p.substr(child_pos, slash_pos - child_pos)); + } + return std::vector(std::make_move_iterator(result.begin()), std::make_move_iterator(result.end())); } DirectoryIteratorPtr MetadataStorageFromPlainObjectStorage::iterateDirectory(const std::string & path) const @@ -102,27 +113,6 @@ StoredObjects MetadataStorageFromPlainObjectStorage::getStorageObjects(const std return {StoredObject(object_key.serialize(), path, object_size)}; } -void MetadataStorageFromPlainObjectStorage::getDirectChildrenOnDisk( - const std::string & storage_key, - const std::string & /* storage_key_perfix */, - const RelativePathsWithMetadata & remote_paths, - const std::string & /* local_path */, - std::unordered_set & result) const -{ - for (const auto & elem : remote_paths) - { - const auto & path = elem->relative_path; - chassert(path.find(storage_key) == 0); - const auto child_pos = storage_key.size(); - /// string::npos is ok. - const auto slash_pos = path.find('/', child_pos); - if (slash_pos == std::string::npos) - result.emplace(path.substr(child_pos)); - else - result.emplace(path.substr(child_pos, slash_pos - child_pos)); - } -} - const IMetadataStorage & MetadataStorageFromPlainObjectStorageTransaction::getStorageForNonTransactionalReads() const { return metadata_storage; diff --git a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.h b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.h index 97c5715a937..237327cd1f4 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.h +++ b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.h @@ -86,14 +86,6 @@ protected: /// Returns a map of local paths to paths in object storage. virtual std::shared_ptr getPathMap() const { throwNotImplemented(); } - - /// Retrieves the immediate files and directories within a given directory on a disk. - virtual void getDirectChildrenOnDisk( - const std::string & storage_key, - const std::string & storage_key_perfix, - const RelativePathsWithMetadata & remote_paths, - const std::string & local_path, - std::unordered_set & result) const; }; class MetadataStorageFromPlainObjectStorageTransaction final : public IMetadataTransaction, private MetadataOperationsHolder diff --git a/src/Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.cpp b/src/Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.cpp index de65cd5c233..b904c0d92b9 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.cpp +++ b/src/Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.cpp @@ -121,7 +121,7 @@ MetadataStorageFromPlainObjectStorage::PathMap loadPathPrefixMap(const std::stri return result; } -void getDirectChildrenOnRewritableDisk( +void getDirectChildrenOnDiskImpl( const std::string & storage_key, const std::string & storage_key_perfix, const RelativePathsWithMetadata & remote_paths, @@ -215,10 +215,8 @@ bool MetadataStorageFromPlainRewritableObjectStorage::exists(const std::string & if (getMetadataKeyPrefix() != object_storage->getCommonKeyPrefix()) { - auto key_prefix = object_storage->generateObjectKeyForPath(path, std::nullopt /* key_prefix */).serialize(); - chassert(key_prefix.starts_with(object_storage->getCommonKeyPrefix())); - auto metadata_key = std::filesystem::path(getMetadataKeyPrefix()) / key_prefix.substr(object_storage->getCommonKeyPrefix().size()); - return object_storage->existsOrHasAnyChild(metadata_key); + auto key_prefix = object_storage->generateObjectKeyForPath(path, getMetadataKeyPrefix()).serialize(); + return object_storage->existsOrHasAnyChild(key_prefix); } return false; @@ -228,11 +226,8 @@ bool MetadataStorageFromPlainRewritableObjectStorage::isDirectory(const std::str { if (getMetadataKeyPrefix() != object_storage->getCommonKeyPrefix()) { - auto directory = std::filesystem::path(object_storage->generateObjectKeyForPath(path, std::nullopt /* key_prefix */).serialize()) / ""; - chassert(directory.string().starts_with(object_storage->getCommonKeyPrefix())); - auto metadata_key - = std::filesystem::path(getMetadataKeyPrefix()) / directory.string().substr(object_storage->getCommonKeyPrefix().size()); - return object_storage->existsOrHasAnyChild(metadata_key); + auto directory = std::filesystem::path(object_storage->generateObjectKeyForPath(path, getMetadataKeyPrefix()).serialize()) / ""; + return object_storage->existsOrHasAnyChild(directory); } else return MetadataStorageFromPlainObjectStorage::isDirectory(path); @@ -240,12 +235,10 @@ bool MetadataStorageFromPlainRewritableObjectStorage::isDirectory(const std::str std::vector MetadataStorageFromPlainRewritableObjectStorage::listDirectory(const std::string & path) const { - auto key_prefix = object_storage->generateObjectKeyForPath(path, std::nullopt /* key_prefix */).serialize(); + auto key_prefix = object_storage->generateObjectKeyForPath(path, "" /* key_prefix */).serialize(); RelativePathsWithMetadata files; - std::string abs_key = key_prefix; - if (!abs_key.ends_with('/')) - abs_key += '/'; + auto abs_key = std::filesystem::path(object_storage->getCommonKeyPrefix()) / key_prefix / ""; object_storage->listObjects(abs_key, files, 0); @@ -255,8 +248,7 @@ std::vector MetadataStorageFromPlainRewritableObjectStorage::listDi /// metadata along with regular files. if (object_storage->getCommonKeyPrefix() != getMetadataKeyPrefix()) { - chassert(abs_key.starts_with(object_storage->getCommonKeyPrefix())); - auto metadata_key = std::filesystem::path(getMetadataKeyPrefix()) / abs_key.substr(object_storage->getCommonKeyPrefix().size()); + auto metadata_key = std::filesystem::path(getMetadataKeyPrefix()) / key_prefix / ""; RelativePathsWithMetadata metadata_files; object_storage->listObjects(metadata_key, metadata_files, 0); getDirectChildrenOnDisk(metadata_key, getMetadataKeyPrefix(), metadata_files, path, directories); @@ -272,7 +264,7 @@ void MetadataStorageFromPlainRewritableObjectStorage::getDirectChildrenOnDisk( const std::string & local_path, std::unordered_set & result) const { - getDirectChildrenOnRewritableDisk(storage_key, storage_key_perfix, remote_paths, local_path, *getPathMap(), metadata_mutex, result); + getDirectChildrenOnDiskImpl(storage_key, storage_key_perfix, remote_paths, local_path, *getPathMap(), metadata_mutex, result); } } diff --git a/src/Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.h b/src/Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.h index 71153cbdc25..b067b391878 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.h +++ b/src/Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.h @@ -33,7 +33,7 @@ protected: const std::string & storage_key_perfix, const RelativePathsWithMetadata & remote_paths, const std::string & local_path, - std::unordered_set & result) const override; + std::unordered_set & result) const; }; } From ecca720f9e076e49f280e48c6ff4046a19894b2a Mon Sep 17 00:00:00 2001 From: Julia Kartseva Date: Fri, 28 Jun 2024 06:42:38 +0000 Subject: [PATCH 572/661] minor --- .../ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp index 02048c07a57..7553c7733b5 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp +++ b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp @@ -145,11 +145,10 @@ void MetadataStorageFromPlainObjectStorageTransaction::createDirectory(const std return; auto normalized_path = normalizeDirectoryPath(path); - auto key_prefix = object_storage->generateObjectKeyPrefixForDirectoryPath(normalized_path, std::nullopt /* key_prefix */).serialize(); - chassert(key_prefix.starts_with(object_storage->getCommonKeyPrefix())); + auto key_prefix = object_storage->generateObjectKeyPrefixForDirectoryPath(normalized_path, "" /* key_prefix */).serialize(); auto op = std::make_unique( std::move(normalized_path), - key_prefix.substr(object_storage->getCommonKeyPrefix().size()), + std::move(key_prefix), *metadata_storage.getPathMap(), object_storage, metadata_storage.getMetadataKeyPrefix()); From 97519ae800b9a26942973d888354a2b013d53cc6 Mon Sep 17 00:00:00 2001 From: Julia Kartseva Date: Thu, 4 Jul 2024 07:02:13 +0000 Subject: [PATCH 573/661] in-memory map path comparator --- .../CommonPathPrefixKeyGenerator.cpp | 5 +++-- .../CommonPathPrefixKeyGenerator.h | 4 +++- .../MetadataStorageFromPlainObjectStorage.h | 3 ++- ...torageFromPlainObjectStorageOperations.cpp | 18 ++++++++--------- ...torageFromPlainRewritableObjectStorage.cpp | 9 ++++----- src/Disks/ObjectStorages/PathComparator.h | 20 +++++++++++++++++++ 6 files changed, 41 insertions(+), 18 deletions(-) create mode 100644 src/Disks/ObjectStorages/PathComparator.h diff --git a/src/Disks/ObjectStorages/CommonPathPrefixKeyGenerator.cpp b/src/Disks/ObjectStorages/CommonPathPrefixKeyGenerator.cpp index ef599a2f366..062a2542654 100644 --- a/src/Disks/ObjectStorages/CommonPathPrefixKeyGenerator.cpp +++ b/src/Disks/ObjectStorages/CommonPathPrefixKeyGenerator.cpp @@ -18,7 +18,8 @@ CommonPathPrefixKeyGenerator::CommonPathPrefixKeyGenerator( ObjectStorageKey CommonPathPrefixKeyGenerator::generate(const String & path, bool is_directory, const std::optional & key_prefix) const { - const auto & [object_key_prefix, suffix_parts] = getLongestObjectKeyPrefix(path); + const auto & [object_key_prefix, suffix_parts] + = getLongestObjectKeyPrefix(is_directory ? std::filesystem::path(path).parent_path().string() : path); auto key = std::filesystem::path(object_key_prefix); @@ -54,7 +55,7 @@ std::tuple> CommonPathPrefixKeyGenerator:: while (p != p.root_path()) { - auto it = ptr->find(p / ""); + auto it = ptr->find(p); if (it != ptr->end()) { std::vector vec(std::make_move_iterator(dq.begin()), std::make_move_iterator(dq.end())); diff --git a/src/Disks/ObjectStorages/CommonPathPrefixKeyGenerator.h b/src/Disks/ObjectStorages/CommonPathPrefixKeyGenerator.h index 08495738505..bca4f7060c4 100644 --- a/src/Disks/ObjectStorages/CommonPathPrefixKeyGenerator.h +++ b/src/Disks/ObjectStorages/CommonPathPrefixKeyGenerator.h @@ -3,6 +3,8 @@ #include #include +#include + #include #include #include @@ -23,7 +25,7 @@ class CommonPathPrefixKeyGenerator : public IObjectStorageKeysGenerator { public: /// Local to remote path map. Leverages filesystem::path comparator for paths. - using PathMap = std::map; + using PathMap = std::map; explicit CommonPathPrefixKeyGenerator(String key_prefix_, SharedMutex & shared_mutex_, std::weak_ptr path_map_); diff --git a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.h b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.h index 237327cd1f4..9ea1c475821 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.h +++ b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.h @@ -4,6 +4,7 @@ #include #include #include +#include #include #include @@ -29,7 +30,7 @@ class MetadataStorageFromPlainObjectStorage : public IMetadataStorage { public: /// Local path prefixes mapped to storage key prefixes. - using PathMap = std::map; + using PathMap = std::map; private: friend class MetadataStorageFromPlainObjectStorageTransaction; diff --git a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.cpp b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.cpp index 0a6086bd39d..b0b384f62c7 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.cpp +++ b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.cpp @@ -43,7 +43,7 @@ MetadataStorageFromPlainObjectStorageCreateDirectoryOperation::MetadataStorageFr void MetadataStorageFromPlainObjectStorageCreateDirectoryOperation::execute(std::unique_lock &) { - if (path_map.contains(path)) + if (path_map.contains(path.parent_path())) return; auto metadata_object_key = createMetadataObjectKey(key_prefix, metadata_key_prefix); @@ -64,7 +64,7 @@ void MetadataStorageFromPlainObjectStorageCreateDirectoryOperation::execute(std: write_created = true; - [[maybe_unused]] auto result = path_map.emplace(path, std::move(key_prefix)); + [[maybe_unused]] auto result = path_map.emplace(path.parent_path(), std::move(key_prefix)); chassert(result.second); auto metric = object_storage->getMetadataStorageMetrics().directory_map_size; CurrentMetrics::add(metric, 1); @@ -84,7 +84,7 @@ void MetadataStorageFromPlainObjectStorageCreateDirectoryOperation::undo(std::un if (write_finalized) { - path_map.erase(path); + path_map.erase(path.parent_path()); auto metric = object_storage->getMetadataStorageMetrics().directory_map_size; CurrentMetrics::sub(metric, 1); @@ -111,11 +111,11 @@ MetadataStorageFromPlainObjectStorageMoveDirectoryOperation::MetadataStorageFrom std::unique_ptr MetadataStorageFromPlainObjectStorageMoveDirectoryOperation::createWriteBuf( const std::filesystem::path & expected_path, const std::filesystem::path & new_path, bool validate_content) { - auto expected_it = path_map.find(expected_path); + auto expected_it = path_map.find(expected_path.parent_path()); if (expected_it == path_map.end()) throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "Metadata object for the expected (source) path '{}' does not exist", expected_path); - if (path_map.contains(new_path)) + if (path_map.contains(new_path.parent_path())) throw Exception(ErrorCodes::FILE_ALREADY_EXISTS, "Metadata object for the new (destination) path '{}' already exists", new_path); auto metadata_object_key = createMetadataObjectKey(expected_it->second, metadata_key_prefix); @@ -156,7 +156,7 @@ void MetadataStorageFromPlainObjectStorageMoveDirectoryOperation::execute(std::u writeString(path_to.string(), *write_buf); write_buf->finalize(); - [[maybe_unused]] auto result = path_map.emplace(path_to, path_map.extract(path_from).mapped()); + [[maybe_unused]] auto result = path_map.emplace(path_to.parent_path(), path_map.extract(path_from.parent_path()).mapped()); chassert(result.second); write_finalized = true; @@ -165,7 +165,7 @@ void MetadataStorageFromPlainObjectStorageMoveDirectoryOperation::execute(std::u void MetadataStorageFromPlainObjectStorageMoveDirectoryOperation::undo(std::unique_lock &) { if (write_finalized) - path_map.emplace(path_from, path_map.extract(path_to).mapped()); + path_map.emplace(path_from.parent_path(), path_map.extract(path_to.parent_path()).mapped()); if (write_created) { @@ -186,7 +186,7 @@ MetadataStorageFromPlainObjectStorageRemoveDirectoryOperation::MetadataStorageFr void MetadataStorageFromPlainObjectStorageRemoveDirectoryOperation::execute(std::unique_lock & /* metadata_lock */) { - auto path_it = path_map.find(path); + auto path_it = path_map.find(path.parent_path()); if (path_it == path_map.end()) return; @@ -223,7 +223,7 @@ void MetadataStorageFromPlainObjectStorageRemoveDirectoryOperation::undo(std::un writeString(path.string(), *buf); buf->finalize(); - path_map.emplace(path, std::move(key_prefix)); + path_map.emplace(path.parent_path(), std::move(key_prefix)); auto metric = object_storage->getMetadataStorageMetrics().directory_map_size; CurrentMetrics::add(metric, 1); } diff --git a/src/Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.cpp b/src/Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.cpp index b904c0d92b9..ba8dfc891dd 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.cpp +++ b/src/Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.cpp @@ -97,7 +97,7 @@ MetadataStorageFromPlainObjectStorage::PathMap loadPathPrefixMap(const std::stri std::pair res; { std::lock_guard lock(mutex); - res = result.emplace(local_path, remote_path.parent_path()); + res = result.emplace(std::filesystem::path(local_path).parent_path(), remote_path.parent_path()); } /// This can happen if table replication is enabled, then the same local path is written @@ -145,11 +145,10 @@ void getDirectChildrenOnDiskImpl( break; auto slash_num = count(k.begin() + local_path.size(), k.end(), '/'); - if (slash_num != 1) + if (slash_num != 0) continue; - chassert(k.back() == '/'); - remote_to_local_subdir.emplace(v, std::string(k.begin() + local_path.size(), k.end() - 1)); + remote_to_local_subdir.emplace(v, std::string(k.begin() + local_path.size(), k.end()) + "/"); } } @@ -243,7 +242,7 @@ std::vector MetadataStorageFromPlainRewritableObjectStorage::listDi object_storage->listObjects(abs_key, files, 0); std::unordered_set directories; - getDirectChildrenOnDisk(abs_key, object_storage->getCommonKeyPrefix(), files, path, directories); + getDirectChildrenOnDisk(abs_key, object_storage->getCommonKeyPrefix(), files, std::filesystem::path(path) / "", directories); /// List empty directories that are identified by the `prefix.path` metadata files. This is required to, e.g., remove /// metadata along with regular files. if (object_storage->getCommonKeyPrefix() != getMetadataKeyPrefix()) diff --git a/src/Disks/ObjectStorages/PathComparator.h b/src/Disks/ObjectStorages/PathComparator.h new file mode 100644 index 00000000000..fe97a465937 --- /dev/null +++ b/src/Disks/ObjectStorages/PathComparator.h @@ -0,0 +1,20 @@ +#pragma once + +#include + +namespace DB +{ +// TODO: rename +struct PathComparator +{ + bool operator()(const std::filesystem::path & path1, const std::filesystem::path & path2) const + { + auto d1 = std::distance(path1.begin(), path1.end()); + auto d2 = std::distance(path2.begin(), path2.end()); + if (d1 != d2) + return d1 < d2; + return path1 < path2; + } +}; + +} From aa290b6398a5affa8405d3584795bce6bf7450d4 Mon Sep 17 00:00:00 2001 From: Julia Kartseva Date: Wed, 10 Jul 2024 01:15:57 +0000 Subject: [PATCH 574/661] use a designated mutex for path_map --- .../CommonPathPrefixKeyGenerator.cpp | 13 ++- .../CommonPathPrefixKeyGenerator.h | 6 +- .../MetadataStorageFromPlainObjectStorage.h | 6 +- ...torageFromPlainObjectStorageOperations.cpp | 99 ++++++++++++++----- ...aStorageFromPlainObjectStorageOperations.h | 13 +-- ...torageFromPlainRewritableObjectStorage.cpp | 25 ++--- ...aStorageFromPlainRewritableObjectStorage.h | 4 +- src/Disks/ObjectStorages/PathComparator.h | 27 +++-- 8 files changed, 122 insertions(+), 71 deletions(-) diff --git a/src/Disks/ObjectStorages/CommonPathPrefixKeyGenerator.cpp b/src/Disks/ObjectStorages/CommonPathPrefixKeyGenerator.cpp index 062a2542654..19dd819fc17 100644 --- a/src/Disks/ObjectStorages/CommonPathPrefixKeyGenerator.cpp +++ b/src/Disks/ObjectStorages/CommonPathPrefixKeyGenerator.cpp @@ -1,4 +1,5 @@ #include "CommonPathPrefixKeyGenerator.h" +#include "Disks/ObjectStorages/PathComparator.h" #include @@ -9,9 +10,8 @@ namespace DB { -CommonPathPrefixKeyGenerator::CommonPathPrefixKeyGenerator( - String key_prefix_, SharedMutex & shared_mutex_, std::weak_ptr path_map_) - : storage_key_prefix(key_prefix_), shared_mutex(shared_mutex_), path_map(std::move(path_map_)) +CommonPathPrefixKeyGenerator::CommonPathPrefixKeyGenerator(String key_prefix_, std::weak_ptr path_map_) + : storage_key_prefix(key_prefix_), path_map(std::move(path_map_)) { } @@ -49,14 +49,13 @@ std::tuple> CommonPathPrefixKeyGenerator:: std::filesystem::path p(path); std::deque dq; - std::shared_lock lock(shared_mutex); - auto ptr = path_map.lock(); + std::shared_lock lock(ptr->mutex); while (p != p.root_path()) { - auto it = ptr->find(p); - if (it != ptr->end()) + auto it = ptr->map.find(p); + if (it != ptr->map.end()) { std::vector vec(std::make_move_iterator(dq.begin()), std::make_move_iterator(dq.end())); return std::make_tuple(it->second, std::move(vec)); diff --git a/src/Disks/ObjectStorages/CommonPathPrefixKeyGenerator.h b/src/Disks/ObjectStorages/CommonPathPrefixKeyGenerator.h index bca4f7060c4..e337745b627 100644 --- a/src/Disks/ObjectStorages/CommonPathPrefixKeyGenerator.h +++ b/src/Disks/ObjectStorages/CommonPathPrefixKeyGenerator.h @@ -25,9 +25,8 @@ class CommonPathPrefixKeyGenerator : public IObjectStorageKeysGenerator { public: /// Local to remote path map. Leverages filesystem::path comparator for paths. - using PathMap = std::map; - explicit CommonPathPrefixKeyGenerator(String key_prefix_, SharedMutex & shared_mutex_, std::weak_ptr path_map_); + explicit CommonPathPrefixKeyGenerator(String key_prefix_, std::weak_ptr path_map_); ObjectStorageKey generate(const String & path, bool is_directory, const std::optional & key_prefix) const override; @@ -37,8 +36,7 @@ private: const String storage_key_prefix; - SharedMutex & shared_mutex; - std::weak_ptr path_map; + std::weak_ptr path_map; }; } diff --git a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.h b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.h index 9ea1c475821..dfb9632666c 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.h +++ b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.h @@ -28,10 +28,6 @@ using UnlinkMetadataFileOperationOutcomePtr = std::shared_ptr; - private: friend class MetadataStorageFromPlainObjectStorageTransaction; @@ -86,7 +82,7 @@ protected: virtual std::string getMetadataKeyPrefix() const { return object_storage->getCommonKeyPrefix(); } /// Returns a map of local paths to paths in object storage. - virtual std::shared_ptr getPathMap() const { throwNotImplemented(); } + virtual std::shared_ptr getPathMap() const { throwNotImplemented(); } }; class MetadataStorageFromPlainObjectStorageTransaction final : public IMetadataTransaction, private MetadataOperationsHolder diff --git a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.cpp b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.cpp index b0b384f62c7..b4a85efbaab 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.cpp +++ b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.cpp @@ -1,4 +1,5 @@ #include "MetadataStorageFromPlainObjectStorageOperations.h" +#include "Disks/ObjectStorages/PathComparator.h" #include #include @@ -30,7 +31,7 @@ ObjectStorageKey createMetadataObjectKey(const std::string & key_prefix, const s MetadataStorageFromPlainObjectStorageCreateDirectoryOperation::MetadataStorageFromPlainObjectStorageCreateDirectoryOperation( std::filesystem::path && path_, std::string && key_prefix_, - MetadataStorageFromPlainObjectStorage::PathMap & path_map_, + InMemoryPathMap & path_map_, ObjectStoragePtr object_storage_, const std::string & metadata_key_prefix_) : path(std::move(path_)) @@ -43,8 +44,13 @@ MetadataStorageFromPlainObjectStorageCreateDirectoryOperation::MetadataStorageFr void MetadataStorageFromPlainObjectStorageCreateDirectoryOperation::execute(std::unique_lock &) { - if (path_map.contains(path.parent_path())) - return; + auto & map = path_map.map; + auto & mutex = path_map.mutex; + { + std::shared_lock lock(mutex); + if (map.contains(path.parent_path())) + return; + } auto metadata_object_key = createMetadataObjectKey(key_prefix, metadata_key_prefix); @@ -64,8 +70,11 @@ void MetadataStorageFromPlainObjectStorageCreateDirectoryOperation::execute(std: write_created = true; - [[maybe_unused]] auto result = path_map.emplace(path.parent_path(), std::move(key_prefix)); - chassert(result.second); + { + std::unique_lock lock(mutex); + [[maybe_unused]] auto result = map.emplace(path.parent_path(), std::move(key_prefix)); + chassert(result.second); + } auto metric = object_storage->getMetadataStorageMetrics().directory_map_size; CurrentMetrics::add(metric, 1); @@ -80,11 +89,17 @@ void MetadataStorageFromPlainObjectStorageCreateDirectoryOperation::execute(std: void MetadataStorageFromPlainObjectStorageCreateDirectoryOperation::undo(std::unique_lock &) { + auto & map = path_map.map; + auto & mutex = path_map.mutex; + auto metadata_object_key = createMetadataObjectKey(key_prefix, metadata_key_prefix); if (write_finalized) { - path_map.erase(path.parent_path()); + { + std::unique_lock lock(mutex); + map.erase(path.parent_path()); + } auto metric = object_storage->getMetadataStorageMetrics().directory_map_size; CurrentMetrics::sub(metric, 1); @@ -97,7 +112,7 @@ void MetadataStorageFromPlainObjectStorageCreateDirectoryOperation::undo(std::un MetadataStorageFromPlainObjectStorageMoveDirectoryOperation::MetadataStorageFromPlainObjectStorageMoveDirectoryOperation( std::filesystem::path && path_from_, std::filesystem::path && path_to_, - MetadataStorageFromPlainObjectStorage::PathMap & path_map_, + InMemoryPathMap & path_map_, ObjectStoragePtr object_storage_, const std::string & metadata_key_prefix_) : path_from(std::move(path_from_)) @@ -111,14 +126,25 @@ MetadataStorageFromPlainObjectStorageMoveDirectoryOperation::MetadataStorageFrom std::unique_ptr MetadataStorageFromPlainObjectStorageMoveDirectoryOperation::createWriteBuf( const std::filesystem::path & expected_path, const std::filesystem::path & new_path, bool validate_content) { - auto expected_it = path_map.find(expected_path.parent_path()); - if (expected_it == path_map.end()) - throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "Metadata object for the expected (source) path '{}' does not exist", expected_path); + auto & map = path_map.map; + auto & mutex = path_map.mutex; - if (path_map.contains(new_path.parent_path())) - throw Exception(ErrorCodes::FILE_ALREADY_EXISTS, "Metadata object for the new (destination) path '{}' already exists", new_path); + std::filesystem::path remote_path; + { + std::shared_lock lock(mutex); + auto expected_it = map.find(expected_path.parent_path()); + if (expected_it == map.end()) + throw Exception( + ErrorCodes::FILE_DOESNT_EXIST, "Metadata object for the expected (source) path '{}' does not exist", expected_path); - auto metadata_object_key = createMetadataObjectKey(expected_it->second, metadata_key_prefix); + if (map.contains(new_path.parent_path())) + throw Exception( + ErrorCodes::FILE_ALREADY_EXISTS, "Metadata object for the new (destination) path '{}' already exists", new_path); + + remote_path = expected_it->second; + } + + auto metadata_object_key = createMetadataObjectKey(remote_path, metadata_key_prefix); auto metadata_object = StoredObject(metadata_object_key.serialize(), expected_path / PREFIX_PATH_FILE_NAME); @@ -156,8 +182,13 @@ void MetadataStorageFromPlainObjectStorageMoveDirectoryOperation::execute(std::u writeString(path_to.string(), *write_buf); write_buf->finalize(); - [[maybe_unused]] auto result = path_map.emplace(path_to.parent_path(), path_map.extract(path_from.parent_path()).mapped()); - chassert(result.second); + auto & map = path_map.map; + auto & mutex = path_map.mutex; + { + std::unique_lock lock(mutex); + [[maybe_unused]] auto result = map.emplace(path_to.parent_path(), map.extract(path_from.parent_path()).mapped()); + chassert(result.second); + } write_finalized = true; } @@ -165,7 +196,12 @@ void MetadataStorageFromPlainObjectStorageMoveDirectoryOperation::execute(std::u void MetadataStorageFromPlainObjectStorageMoveDirectoryOperation::undo(std::unique_lock &) { if (write_finalized) - path_map.emplace(path_from.parent_path(), path_map.extract(path_to.parent_path()).mapped()); + { + auto & map = path_map.map; + auto & mutex = path_map.mutex; + std::unique_lock lock(mutex); + map.emplace(path_from.parent_path(), map.extract(path_to.parent_path()).mapped()); + } if (write_created) { @@ -176,28 +212,34 @@ void MetadataStorageFromPlainObjectStorageMoveDirectoryOperation::undo(std::uniq } MetadataStorageFromPlainObjectStorageRemoveDirectoryOperation::MetadataStorageFromPlainObjectStorageRemoveDirectoryOperation( - std::filesystem::path && path_, - MetadataStorageFromPlainObjectStorage::PathMap & path_map_, - ObjectStoragePtr object_storage_, - const std::string & metadata_key_prefix_) + std::filesystem::path && path_, InMemoryPathMap & path_map_, ObjectStoragePtr object_storage_, const std::string & metadata_key_prefix_) : path(std::move(path_)), path_map(path_map_), object_storage(object_storage_), metadata_key_prefix(metadata_key_prefix_) { } void MetadataStorageFromPlainObjectStorageRemoveDirectoryOperation::execute(std::unique_lock & /* metadata_lock */) { - auto path_it = path_map.find(path.parent_path()); - if (path_it == path_map.end()) - return; + auto & map = path_map.map; + auto & mutex = path_map.mutex; + { + std::shared_lock lock(mutex); + auto path_it = map.find(path.parent_path()); + if (path_it == map.end()) + return; + key_prefix = path_it->second; + } LOG_TRACE(getLogger("MetadataStorageFromPlainObjectStorageRemoveDirectoryOperation"), "Removing directory '{}'", path); - key_prefix = path_it->second; auto metadata_object_key = createMetadataObjectKey(key_prefix, metadata_key_prefix); auto metadata_object = StoredObject(metadata_object_key.serialize(), path / PREFIX_PATH_FILE_NAME); object_storage->removeObject(metadata_object); - path_map.erase(path_it); + { + std::unique_lock lock(mutex); + map.erase(path.parent_path()); + } + auto metric = object_storage->getMetadataStorageMetrics().directory_map_size; CurrentMetrics::sub(metric, 1); @@ -223,7 +265,12 @@ void MetadataStorageFromPlainObjectStorageRemoveDirectoryOperation::undo(std::un writeString(path.string(), *buf); buf->finalize(); - path_map.emplace(path.parent_path(), std::move(key_prefix)); + auto & map = path_map.map; + auto & mutex = path_map.mutex; + { + std::unique_lock lock(mutex); + map.emplace(path.parent_path(), std::move(key_prefix)); + } auto metric = object_storage->getMetadataStorageMetrics().directory_map_size; CurrentMetrics::add(metric, 1); } diff --git a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.h b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.h index e31e3cbb262..1b2471dd316 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.h +++ b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.h @@ -2,6 +2,7 @@ #include #include +#include "Disks/ObjectStorages/PathComparator.h" #include #include @@ -14,7 +15,7 @@ class MetadataStorageFromPlainObjectStorageCreateDirectoryOperation final : publ private: std::filesystem::path path; std::string key_prefix; - MetadataStorageFromPlainObjectStorage::PathMap & path_map; + InMemoryPathMap & path_map; ObjectStoragePtr object_storage; const std::string metadata_key_prefix; @@ -26,7 +27,7 @@ public: MetadataStorageFromPlainObjectStorageCreateDirectoryOperation( std::filesystem::path && path_, std::string && key_prefix_, - MetadataStorageFromPlainObjectStorage::PathMap & path_map_, + InMemoryPathMap & path_map_, ObjectStoragePtr object_storage_, const std::string & metadata_key_prefix_); @@ -39,7 +40,7 @@ class MetadataStorageFromPlainObjectStorageMoveDirectoryOperation final : public private: std::filesystem::path path_from; std::filesystem::path path_to; - MetadataStorageFromPlainObjectStorage::PathMap & path_map; + InMemoryPathMap & path_map; ObjectStoragePtr object_storage; const std::string metadata_key_prefix; @@ -53,7 +54,7 @@ public: MetadataStorageFromPlainObjectStorageMoveDirectoryOperation( std::filesystem::path && path_from_, std::filesystem::path && path_to_, - MetadataStorageFromPlainObjectStorage::PathMap & path_map_, + InMemoryPathMap & path_map_, ObjectStoragePtr object_storage_, const std::string & metadata_key_prefix_); @@ -67,7 +68,7 @@ class MetadataStorageFromPlainObjectStorageRemoveDirectoryOperation final : publ private: std::filesystem::path path; - MetadataStorageFromPlainObjectStorage::PathMap & path_map; + InMemoryPathMap & path_map; ObjectStoragePtr object_storage; const std::string metadata_key_prefix; @@ -77,7 +78,7 @@ private: public: MetadataStorageFromPlainObjectStorageRemoveDirectoryOperation( std::filesystem::path && path_, - MetadataStorageFromPlainObjectStorage::PathMap & path_map_, + InMemoryPathMap & path_map_, ObjectStoragePtr object_storage_, const std::string & metadata_key_prefix_); diff --git a/src/Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.cpp b/src/Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.cpp index ba8dfc891dd..cf51a6a5314 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.cpp +++ b/src/Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.cpp @@ -3,11 +3,12 @@ #include #include -#include #include +#include #include #include #include "CommonPathPrefixKeyGenerator.h" +#include "Disks/ObjectStorages/PathComparator.h" namespace DB @@ -37,9 +38,10 @@ std::string getMetadataKeyPrefix(ObjectStoragePtr object_storage) : metadata_key_prefix; } -MetadataStorageFromPlainObjectStorage::PathMap loadPathPrefixMap(const std::string & metadata_key_prefix, ObjectStoragePtr object_storage) +InMemoryPathMap::Map loadPathPrefixMap(const std::string & metadata_key_prefix, ObjectStoragePtr object_storage) { - MetadataStorageFromPlainObjectStorage::PathMap result; + using Map = InMemoryPathMap::Map; + Map result; ThreadPool & pool = getIOThreadPool().get(); ThreadPoolCallbackRunnerLocal runner(pool, "PlainRWMetaLoad"); @@ -94,7 +96,7 @@ MetadataStorageFromPlainObjectStorage::PathMap loadPathPrefixMap(const std::stri chassert(remote_metadata_path.string().starts_with(metadata_key_prefix)); auto suffix = remote_metadata_path.string().substr(metadata_key_prefix.size()); auto remote_path = std::filesystem::path(std::move(suffix)); - std::pair res; + std::pair res; { std::lock_guard lock(mutex); res = result.emplace(std::filesystem::path(local_path).parent_path(), remote_path.parent_path()); @@ -126,14 +128,13 @@ void getDirectChildrenOnDiskImpl( const std::string & storage_key_perfix, const RelativePathsWithMetadata & remote_paths, const std::string & local_path, - const MetadataStorageFromPlainObjectStorage::PathMap & local_path_prefixes, + const InMemoryPathMap::Map & local_path_prefixes, SharedMutex & shared_mutex, std::unordered_set & result) { - using PathMap = MetadataStorageFromPlainObjectStorage::PathMap; - /// Map remote paths into local subdirectories. - std::unordered_map remote_to_local_subdir; + using Map = InMemoryPathMap::Map; + std::unordered_map remote_to_local_subdir; { std::shared_lock lock(shared_mutex); @@ -189,7 +190,7 @@ MetadataStorageFromPlainRewritableObjectStorage::MetadataStorageFromPlainRewrita ObjectStoragePtr object_storage_, String storage_path_prefix_) : MetadataStorageFromPlainObjectStorage(object_storage_, storage_path_prefix_) , metadata_key_prefix(DB::getMetadataKeyPrefix(object_storage)) - , path_map(std::make_shared(loadPathPrefixMap(metadata_key_prefix, object_storage))) + , path_map(std::make_shared(loadPathPrefixMap(metadata_key_prefix, object_storage))) { if (object_storage->isWriteOnce()) throw Exception( @@ -197,14 +198,14 @@ MetadataStorageFromPlainRewritableObjectStorage::MetadataStorageFromPlainRewrita "MetadataStorageFromPlainRewritableObjectStorage is not compatible with write-once storage '{}'", object_storage->getName()); - auto keys_gen = std::make_shared(object_storage->getCommonKeyPrefix(), metadata_mutex, path_map); + auto keys_gen = std::make_shared(object_storage->getCommonKeyPrefix(), path_map); object_storage->setKeysGenerator(keys_gen); } MetadataStorageFromPlainRewritableObjectStorage::~MetadataStorageFromPlainRewritableObjectStorage() { auto metric = object_storage->getMetadataStorageMetrics().directory_map_size; - CurrentMetrics::sub(metric, path_map->size()); + CurrentMetrics::sub(metric, path_map->map.size()); } bool MetadataStorageFromPlainRewritableObjectStorage::exists(const std::string & path) const @@ -263,7 +264,7 @@ void MetadataStorageFromPlainRewritableObjectStorage::getDirectChildrenOnDisk( const std::string & local_path, std::unordered_set & result) const { - getDirectChildrenOnDiskImpl(storage_key, storage_key_perfix, remote_paths, local_path, *getPathMap(), metadata_mutex, result); + getDirectChildrenOnDiskImpl(storage_key, storage_key_perfix, remote_paths, local_path, getPathMap()->map, getPathMap()->mutex, result); } } diff --git a/src/Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.h b/src/Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.h index b067b391878..fea461abab8 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.h +++ b/src/Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.h @@ -13,7 +13,7 @@ class MetadataStorageFromPlainRewritableObjectStorage final : public MetadataSto { private: const std::string metadata_key_prefix; - std::shared_ptr path_map; + std::shared_ptr path_map; public: MetadataStorageFromPlainRewritableObjectStorage(ObjectStoragePtr object_storage_, String storage_path_prefix_); @@ -27,7 +27,7 @@ public: protected: std::string getMetadataKeyPrefix() const override { return metadata_key_prefix; } - std::shared_ptr getPathMap() const override { return path_map; } + std::shared_ptr getPathMap() const override { return path_map; } void getDirectChildrenOnDisk( const std::string & storage_key, const std::string & storage_key_perfix, diff --git a/src/Disks/ObjectStorages/PathComparator.h b/src/Disks/ObjectStorages/PathComparator.h index fe97a465937..fae82108789 100644 --- a/src/Disks/ObjectStorages/PathComparator.h +++ b/src/Disks/ObjectStorages/PathComparator.h @@ -1,20 +1,29 @@ #pragma once #include +#include +#include "Common/SharedMutex.h" namespace DB { -// TODO: rename -struct PathComparator + + +struct InMemoryPathMap { - bool operator()(const std::filesystem::path & path1, const std::filesystem::path & path2) const + struct PathComparator { - auto d1 = std::distance(path1.begin(), path1.end()); - auto d2 = std::distance(path2.begin(), path2.end()); - if (d1 != d2) - return d1 < d2; - return path1 < path2; - } + bool operator()(const std::filesystem::path & path1, const std::filesystem::path & path2) const + { + auto d1 = std::distance(path1.begin(), path1.end()); + auto d2 = std::distance(path2.begin(), path2.end()); + if (d1 != d2) + return d1 < d2; + return path1 < path2; + } + }; + using Map = std::map; + Map map; + SharedMutex mutex; }; } From 0e78ed6b580646cc08721eef415ffb3fe2f697cb Mon Sep 17 00:00:00 2001 From: Julia Kartseva Date: Wed, 10 Jul 2024 23:04:21 +0000 Subject: [PATCH 575/661] simplify listDirectory --- ...torageFromPlainRewritableObjectStorage.cpp | 28 ++++--------------- ...aStorageFromPlainRewritableObjectStorage.h | 1 - 2 files changed, 5 insertions(+), 24 deletions(-) diff --git a/src/Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.cpp b/src/Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.cpp index cf51a6a5314..6a0eff0a136 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.cpp +++ b/src/Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.cpp @@ -125,17 +125,12 @@ InMemoryPathMap::Map loadPathPrefixMap(const std::string & metadata_key_prefix, void getDirectChildrenOnDiskImpl( const std::string & storage_key, - const std::string & storage_key_perfix, const RelativePathsWithMetadata & remote_paths, const std::string & local_path, const InMemoryPathMap::Map & local_path_prefixes, SharedMutex & shared_mutex, std::unordered_set & result) { - /// Map remote paths into local subdirectories. - using Map = InMemoryPathMap::Map; - std::unordered_map remote_to_local_subdir; - { std::shared_lock lock(shared_mutex); auto end_it = local_path_prefixes.end(); @@ -147,9 +142,9 @@ void getDirectChildrenOnDiskImpl( auto slash_num = count(k.begin() + local_path.size(), k.end(), '/'); if (slash_num != 0) - continue; + break; - remote_to_local_subdir.emplace(v, std::string(k.begin() + local_path.size(), k.end()) + "/"); + result.emplace(std::string(k.begin() + local_path.size(), k.end()) + "/"); } } @@ -169,18 +164,6 @@ void getDirectChildrenOnDiskImpl( if (!skip_list.contains(filename)) result.emplace(std::move(filename)); } - else - { - /// Subdirectories. - chassert(path.find(storage_key_perfix) == 0); - auto it = remote_to_local_subdir.find(path.substr(storage_key_perfix.size(), slash_pos - storage_key_perfix.size())); - /// Mapped subdirectories. - if (it != remote_to_local_subdir.end()) - result.emplace(it->second); - /// The remote subdirectory name is the same as the local subdirectory. - else - result.emplace(path.substr(child_pos, slash_pos - child_pos)); - } } } @@ -243,7 +226,7 @@ std::vector MetadataStorageFromPlainRewritableObjectStorage::listDi object_storage->listObjects(abs_key, files, 0); std::unordered_set directories; - getDirectChildrenOnDisk(abs_key, object_storage->getCommonKeyPrefix(), files, std::filesystem::path(path) / "", directories); + getDirectChildrenOnDisk(abs_key, files, std::filesystem::path(path) / "", directories); /// List empty directories that are identified by the `prefix.path` metadata files. This is required to, e.g., remove /// metadata along with regular files. if (object_storage->getCommonKeyPrefix() != getMetadataKeyPrefix()) @@ -251,7 +234,7 @@ std::vector MetadataStorageFromPlainRewritableObjectStorage::listDi auto metadata_key = std::filesystem::path(getMetadataKeyPrefix()) / key_prefix / ""; RelativePathsWithMetadata metadata_files; object_storage->listObjects(metadata_key, metadata_files, 0); - getDirectChildrenOnDisk(metadata_key, getMetadataKeyPrefix(), metadata_files, path, directories); + getDirectChildrenOnDisk(metadata_key, metadata_files, std::filesystem::path(path) / "", directories); } return std::vector(std::make_move_iterator(directories.begin()), std::make_move_iterator(directories.end())); @@ -259,12 +242,11 @@ std::vector MetadataStorageFromPlainRewritableObjectStorage::listDi void MetadataStorageFromPlainRewritableObjectStorage::getDirectChildrenOnDisk( const std::string & storage_key, - const std::string & storage_key_perfix, const RelativePathsWithMetadata & remote_paths, const std::string & local_path, std::unordered_set & result) const { - getDirectChildrenOnDiskImpl(storage_key, storage_key_perfix, remote_paths, local_path, getPathMap()->map, getPathMap()->mutex, result); + getDirectChildrenOnDiskImpl(storage_key, remote_paths, local_path, getPathMap()->map, getPathMap()->mutex, result); } } diff --git a/src/Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.h b/src/Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.h index fea461abab8..8fd147e15b9 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.h +++ b/src/Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.h @@ -30,7 +30,6 @@ protected: std::shared_ptr getPathMap() const override { return path_map; } void getDirectChildrenOnDisk( const std::string & storage_key, - const std::string & storage_key_perfix, const RelativePathsWithMetadata & remote_paths, const std::string & local_path, std::unordered_set & result) const; From 82f5aceb484a322960065f973bec2b61c31219aa Mon Sep 17 00:00:00 2001 From: Julia Kartseva Date: Thu, 11 Jul 2024 06:22:26 +0000 Subject: [PATCH 576/661] introduce flat structure --- .../FlatStructureKeyGenerator.cpp | 51 +++++++++++++++++++ .../FlatStructureKeyGenerator.h | 23 +++++++++ ...torageFromPlainRewritableObjectStorage.cpp | 15 ++++-- 3 files changed, 86 insertions(+), 3 deletions(-) create mode 100644 src/Disks/ObjectStorages/FlatStructureKeyGenerator.cpp create mode 100644 src/Disks/ObjectStorages/FlatStructureKeyGenerator.h diff --git a/src/Disks/ObjectStorages/FlatStructureKeyGenerator.cpp b/src/Disks/ObjectStorages/FlatStructureKeyGenerator.cpp new file mode 100644 index 00000000000..d6fb32b65d4 --- /dev/null +++ b/src/Disks/ObjectStorages/FlatStructureKeyGenerator.cpp @@ -0,0 +1,51 @@ +#include "FlatStructureKeyGenerator.h" +#include "Common/ObjectStorageKey.h" +#include "Common/SharedMutex.h" +#include "Disks/ObjectStorages/PathComparator.h" +#include + +#include +#include +#include + +namespace DB +{ + +FlatStructureKeyGenerator::FlatStructureKeyGenerator(String storage_key_prefix_, std::weak_ptr path_map_) + : storage_key_prefix(storage_key_prefix_), path_map(std::move(path_map_)) +{ +} + +ObjectStorageKey FlatStructureKeyGenerator::generate(const String & path, bool is_directory, const std::optional & key_prefix) const +{ + if (is_directory) + chassert(path.ends_with('/')); + + const auto p = std::filesystem::path(path); + auto directory = p.parent_path(); + + constexpr size_t part_size = 32; + + std::optional remote_path; + { + auto ptr = path_map.lock(); + std::shared_lock lock(ptr->mutex); + auto it = ptr->map.find(p); + if (it != ptr->map.end()) + return ObjectStorageKey::createAsRelative(key_prefix.has_value() ? *key_prefix : storage_key_prefix, it->second); + + it = ptr->map.find(directory); + if (it != ptr->map.end()) + remote_path = it->second; + } + std::filesystem::path key = remote_path.has_value() ? *remote_path + : is_directory ? std::filesystem::path(getRandomASCIIString(part_size)) + : directory; + + if (!is_directory) + key /= p.filename(); + + return ObjectStorageKey::createAsRelative(key_prefix.has_value() ? *key_prefix : storage_key_prefix, key); +} + +} diff --git a/src/Disks/ObjectStorages/FlatStructureKeyGenerator.h b/src/Disks/ObjectStorages/FlatStructureKeyGenerator.h new file mode 100644 index 00000000000..2c585dffb81 --- /dev/null +++ b/src/Disks/ObjectStorages/FlatStructureKeyGenerator.h @@ -0,0 +1,23 @@ +#pragma once + +#include +#include + +#include +namespace DB +{ + +class FlatStructureKeyGenerator : public IObjectStorageKeysGenerator +{ +public: + explicit FlatStructureKeyGenerator(String storage_key_prefix_, std::weak_ptr path_map_); + + ObjectStorageKey generate(const String & path, bool is_directory, const std::optional & key_prefix) const override; + +private: + const String storage_key_prefix; + + std::weak_ptr path_map; +}; + +} diff --git a/src/Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.cpp b/src/Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.cpp index 6a0eff0a136..afaa7bf06ff 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.cpp +++ b/src/Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.cpp @@ -1,5 +1,7 @@ #include +#include #include +#include #include #include @@ -8,7 +10,6 @@ #include #include #include "CommonPathPrefixKeyGenerator.h" -#include "Disks/ObjectStorages/PathComparator.h" namespace DB @@ -181,8 +182,16 @@ MetadataStorageFromPlainRewritableObjectStorage::MetadataStorageFromPlainRewrita "MetadataStorageFromPlainRewritableObjectStorage is not compatible with write-once storage '{}'", object_storage->getName()); - auto keys_gen = std::make_shared(object_storage->getCommonKeyPrefix(), path_map); - object_storage->setKeysGenerator(keys_gen); + if (getMetadataKeyPrefix() == object_storage->getCommonKeyPrefix()) + { + auto keys_gen = std::make_shared(object_storage->getCommonKeyPrefix(), path_map); + object_storage->setKeysGenerator(keys_gen); + } + else + { + auto keys_gen = std::make_shared(object_storage->getCommonKeyPrefix(), path_map); + object_storage->setKeysGenerator(keys_gen); + } } MetadataStorageFromPlainRewritableObjectStorage::~MetadataStorageFromPlainRewritableObjectStorage() From c0e6780dfe5977316e50e587877f3fe6ef11d048 Mon Sep 17 00:00:00 2001 From: Julia Kartseva Date: Thu, 11 Jul 2024 22:48:07 +0000 Subject: [PATCH 577/661] rename PathComparator.h -> InMemoryPathMap.h --- src/Disks/ObjectStorages/CommonPathPrefixKeyGenerator.cpp | 4 ++-- src/Disks/ObjectStorages/CommonPathPrefixKeyGenerator.h | 5 ++--- src/Disks/ObjectStorages/FlatStructureKeyGenerator.cpp | 2 +- src/Disks/ObjectStorages/FlatStructureKeyGenerator.h | 2 +- .../ObjectStorages/{PathComparator.h => InMemoryPathMap.h} | 0 .../ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp | 1 + .../ObjectStorages/MetadataStorageFromPlainObjectStorage.h | 3 ++- .../MetadataStorageFromPlainObjectStorageOperations.cpp | 2 +- .../MetadataStorageFromPlainObjectStorageOperations.h | 2 +- .../MetadataStorageFromPlainRewritableObjectStorage.cpp | 4 ++-- 10 files changed, 13 insertions(+), 12 deletions(-) rename src/Disks/ObjectStorages/{PathComparator.h => InMemoryPathMap.h} (100%) diff --git a/src/Disks/ObjectStorages/CommonPathPrefixKeyGenerator.cpp b/src/Disks/ObjectStorages/CommonPathPrefixKeyGenerator.cpp index 19dd819fc17..1fa06823bae 100644 --- a/src/Disks/ObjectStorages/CommonPathPrefixKeyGenerator.cpp +++ b/src/Disks/ObjectStorages/CommonPathPrefixKeyGenerator.cpp @@ -1,5 +1,5 @@ -#include "CommonPathPrefixKeyGenerator.h" -#include "Disks/ObjectStorages/PathComparator.h" +#include +#include #include diff --git a/src/Disks/ObjectStorages/CommonPathPrefixKeyGenerator.h b/src/Disks/ObjectStorages/CommonPathPrefixKeyGenerator.h index e337745b627..8b5037e3804 100644 --- a/src/Disks/ObjectStorages/CommonPathPrefixKeyGenerator.h +++ b/src/Disks/ObjectStorages/CommonPathPrefixKeyGenerator.h @@ -1,9 +1,6 @@ #pragma once #include -#include - -#include #include #include @@ -21,6 +18,8 @@ namespace DB /// /// The key generator ensures that the original directory hierarchy is /// preserved, which is required for the MergeTree family. + +struct InMemoryPathMap; class CommonPathPrefixKeyGenerator : public IObjectStorageKeysGenerator { public: diff --git a/src/Disks/ObjectStorages/FlatStructureKeyGenerator.cpp b/src/Disks/ObjectStorages/FlatStructureKeyGenerator.cpp index d6fb32b65d4..414aea2b08b 100644 --- a/src/Disks/ObjectStorages/FlatStructureKeyGenerator.cpp +++ b/src/Disks/ObjectStorages/FlatStructureKeyGenerator.cpp @@ -1,7 +1,7 @@ #include "FlatStructureKeyGenerator.h" +#include #include "Common/ObjectStorageKey.h" #include "Common/SharedMutex.h" -#include "Disks/ObjectStorages/PathComparator.h" #include #include diff --git a/src/Disks/ObjectStorages/FlatStructureKeyGenerator.h b/src/Disks/ObjectStorages/FlatStructureKeyGenerator.h index 2c585dffb81..6b5b2203bed 100644 --- a/src/Disks/ObjectStorages/FlatStructureKeyGenerator.h +++ b/src/Disks/ObjectStorages/FlatStructureKeyGenerator.h @@ -1,12 +1,12 @@ #pragma once -#include #include #include namespace DB { +struct InMemoryPathMap; class FlatStructureKeyGenerator : public IObjectStorageKeysGenerator { public: diff --git a/src/Disks/ObjectStorages/PathComparator.h b/src/Disks/ObjectStorages/InMemoryPathMap.h similarity index 100% rename from src/Disks/ObjectStorages/PathComparator.h rename to src/Disks/ObjectStorages/InMemoryPathMap.h diff --git a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp index 7553c7733b5..364d04e2b52 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp +++ b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp @@ -1,5 +1,6 @@ #include "MetadataStorageFromPlainObjectStorage.h" #include +#include #include #include diff --git a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.h b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.h index dfb9632666c..a9a1a648f96 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.h +++ b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.h @@ -2,9 +2,9 @@ #include #include +#include #include #include -#include #include #include @@ -13,6 +13,7 @@ namespace DB { +struct InMemoryPathMap; struct UnlinkMetadataFileOperationOutcome; using UnlinkMetadataFileOperationOutcomePtr = std::shared_ptr; diff --git a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.cpp b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.cpp index b4a85efbaab..6f5109faec4 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.cpp +++ b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.cpp @@ -1,5 +1,5 @@ #include "MetadataStorageFromPlainObjectStorageOperations.h" -#include "Disks/ObjectStorages/PathComparator.h" +#include #include #include diff --git a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.h b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.h index 1b2471dd316..778585fa758 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.h +++ b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.h @@ -1,8 +1,8 @@ #pragma once #include +#include #include -#include "Disks/ObjectStorages/PathComparator.h" #include #include diff --git a/src/Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.cpp b/src/Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.cpp index afaa7bf06ff..c312eae4077 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.cpp +++ b/src/Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.cpp @@ -1,7 +1,7 @@ -#include #include +#include +#include #include -#include #include #include From 3f066018fb0c74783ee486f54fc472ffd9cd7cc1 Mon Sep 17 00:00:00 2001 From: Julia Kartseva Date: Fri, 12 Jul 2024 02:27:45 +0000 Subject: [PATCH 578/661] style and doc --- src/Common/ObjectStorageKeyGenerator.cpp | 1 - src/Common/ObjectStorageKeyGenerator.h | 4 ++++ .../CommonPathPrefixKeyGenerator.h | 1 + ...=> FlatDirectoryStructureKeyGenerator.cpp} | 9 ++++--- ...h => FlatDirectoryStructureKeyGenerator.h} | 4 ++-- .../ObjectStorages/HDFS/HDFSObjectStorage.cpp | 1 - src/Disks/ObjectStorages/InMemoryPathMap.h | 2 +- .../Local/LocalObjectStorage.cpp | 1 - .../MetadataStorageFromPlainObjectStorage.cpp | 2 -- .../MetadataStorageFromPlainObjectStorage.h | 5 ++-- ...torageFromPlainObjectStorageOperations.cpp | 24 +++++++++---------- ...aStorageFromPlainObjectStorageOperations.h | 3 +-- ...torageFromPlainRewritableObjectStorage.cpp | 12 +++++++--- 13 files changed, 35 insertions(+), 34 deletions(-) rename src/Disks/ObjectStorages/{FlatStructureKeyGenerator.cpp => FlatDirectoryStructureKeyGenerator.cpp} (80%) rename src/Disks/ObjectStorages/{FlatStructureKeyGenerator.h => FlatDirectoryStructureKeyGenerator.h} (64%) diff --git a/src/Common/ObjectStorageKeyGenerator.cpp b/src/Common/ObjectStorageKeyGenerator.cpp index 3e7bf3116bd..3bdc0004198 100644 --- a/src/Common/ObjectStorageKeyGenerator.cpp +++ b/src/Common/ObjectStorageKeyGenerator.cpp @@ -3,7 +3,6 @@ #include #include -#include #include diff --git a/src/Common/ObjectStorageKeyGenerator.h b/src/Common/ObjectStorageKeyGenerator.h index 12aeec1714d..008e3c88fac 100644 --- a/src/Common/ObjectStorageKeyGenerator.h +++ b/src/Common/ObjectStorageKeyGenerator.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include "ObjectStorageKey.h" namespace DB @@ -12,6 +13,9 @@ public: virtual ~IObjectStorageKeysGenerator() = default; /// Generates an object storage key based on a path in the virtual filesystem. + /// @param path - Path in the virtual filesystem. + /// @param is_directory - If the path in the virtual filesystem corresponds to a directory. + /// @param key_prefix - Optional key prefix for the generated object storage key. If provided, this prefix will be added to the beginning of the generated key. virtual ObjectStorageKey generate(const String & path, bool is_directory, const std::optional & key_prefix) const = 0; }; diff --git a/src/Disks/ObjectStorages/CommonPathPrefixKeyGenerator.h b/src/Disks/ObjectStorages/CommonPathPrefixKeyGenerator.h index 8b5037e3804..ea91d78600d 100644 --- a/src/Disks/ObjectStorages/CommonPathPrefixKeyGenerator.h +++ b/src/Disks/ObjectStorages/CommonPathPrefixKeyGenerator.h @@ -9,6 +9,7 @@ namespace DB { +/// Deprecated. Used for backward compatibility with plain rewritable disks without a separate metadata layout. /// Object storage key generator used specifically with the /// MetadataStorageFromPlainObjectStorage if multiple writes are allowed. diff --git a/src/Disks/ObjectStorages/FlatStructureKeyGenerator.cpp b/src/Disks/ObjectStorages/FlatDirectoryStructureKeyGenerator.cpp similarity index 80% rename from src/Disks/ObjectStorages/FlatStructureKeyGenerator.cpp rename to src/Disks/ObjectStorages/FlatDirectoryStructureKeyGenerator.cpp index 414aea2b08b..64959b729b6 100644 --- a/src/Disks/ObjectStorages/FlatStructureKeyGenerator.cpp +++ b/src/Disks/ObjectStorages/FlatDirectoryStructureKeyGenerator.cpp @@ -1,4 +1,4 @@ -#include "FlatStructureKeyGenerator.h" +#include "FlatDirectoryStructureKeyGenerator.h" #include #include "Common/ObjectStorageKey.h" #include "Common/SharedMutex.h" @@ -11,12 +11,12 @@ namespace DB { -FlatStructureKeyGenerator::FlatStructureKeyGenerator(String storage_key_prefix_, std::weak_ptr path_map_) +FlatDirectoryStructureKeyGenerator::FlatDirectoryStructureKeyGenerator(String storage_key_prefix_, std::weak_ptr path_map_) : storage_key_prefix(storage_key_prefix_), path_map(std::move(path_map_)) { } -ObjectStorageKey FlatStructureKeyGenerator::generate(const String & path, bool is_directory, const std::optional & key_prefix) const +ObjectStorageKey FlatDirectoryStructureKeyGenerator::generate(const String & path, bool is_directory, const std::optional & key_prefix) const { if (is_directory) chassert(path.ends_with('/')); @@ -24,8 +24,6 @@ ObjectStorageKey FlatStructureKeyGenerator::generate(const String & path, bool i const auto p = std::filesystem::path(path); auto directory = p.parent_path(); - constexpr size_t part_size = 32; - std::optional remote_path; { auto ptr = path_map.lock(); @@ -38,6 +36,7 @@ ObjectStorageKey FlatStructureKeyGenerator::generate(const String & path, bool i if (it != ptr->map.end()) remote_path = it->second; } + constexpr size_t part_size = 32; std::filesystem::path key = remote_path.has_value() ? *remote_path : is_directory ? std::filesystem::path(getRandomASCIIString(part_size)) : directory; diff --git a/src/Disks/ObjectStorages/FlatStructureKeyGenerator.h b/src/Disks/ObjectStorages/FlatDirectoryStructureKeyGenerator.h similarity index 64% rename from src/Disks/ObjectStorages/FlatStructureKeyGenerator.h rename to src/Disks/ObjectStorages/FlatDirectoryStructureKeyGenerator.h index 6b5b2203bed..4dbac5d3003 100644 --- a/src/Disks/ObjectStorages/FlatStructureKeyGenerator.h +++ b/src/Disks/ObjectStorages/FlatDirectoryStructureKeyGenerator.h @@ -7,10 +7,10 @@ namespace DB { struct InMemoryPathMap; -class FlatStructureKeyGenerator : public IObjectStorageKeysGenerator +class FlatDirectoryStructureKeyGenerator : public IObjectStorageKeysGenerator { public: - explicit FlatStructureKeyGenerator(String storage_key_prefix_, std::weak_ptr path_map_); + explicit FlatDirectoryStructureKeyGenerator(String storage_key_prefix_, std::weak_ptr path_map_); ObjectStorageKey generate(const String & path, bool is_directory, const std::optional & key_prefix) const override; diff --git a/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp index 3ce2a0f4903..00ef4b63e6f 100644 --- a/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp +++ b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp @@ -4,7 +4,6 @@ #include #include -#include #include #include #include diff --git a/src/Disks/ObjectStorages/InMemoryPathMap.h b/src/Disks/ObjectStorages/InMemoryPathMap.h index fae82108789..ea08784719e 100644 --- a/src/Disks/ObjectStorages/InMemoryPathMap.h +++ b/src/Disks/ObjectStorages/InMemoryPathMap.h @@ -2,7 +2,7 @@ #include #include -#include "Common/SharedMutex.h" +#include namespace DB { diff --git a/src/Disks/ObjectStorages/Local/LocalObjectStorage.cpp b/src/Disks/ObjectStorages/Local/LocalObjectStorage.cpp index 20ef135cdf7..5b61c57ca21 100644 --- a/src/Disks/ObjectStorages/Local/LocalObjectStorage.cpp +++ b/src/Disks/ObjectStorages/Local/LocalObjectStorage.cpp @@ -1,7 +1,6 @@ #include #include -#include #include #include #include diff --git a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp index 364d04e2b52..2036208c389 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp +++ b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp @@ -146,10 +146,8 @@ void MetadataStorageFromPlainObjectStorageTransaction::createDirectory(const std return; auto normalized_path = normalizeDirectoryPath(path); - auto key_prefix = object_storage->generateObjectKeyPrefixForDirectoryPath(normalized_path, "" /* key_prefix */).serialize(); auto op = std::make_unique( std::move(normalized_path), - std::move(key_prefix), *metadata_storage.getPathMap(), object_storage, metadata_storage.getMetadataKeyPrefix()); diff --git a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.h b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.h index a9a1a648f96..2aac7158bd5 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.h +++ b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.h @@ -78,11 +78,10 @@ public: bool supportsStat() const override { return false; } protected: - /// Get the object storage prefix for storing metadata files. If stored behind a separate endpoint, - /// the metadata keys reflect the layout of the regular files. + /// Get the object storage prefix for storing metadata files. virtual std::string getMetadataKeyPrefix() const { return object_storage->getCommonKeyPrefix(); } - /// Returns a map of local paths to paths in object storage. + /// Returns a map of virtual filesystem paths to paths in the object storage. virtual std::shared_ptr getPathMap() const { throwNotImplemented(); } }; diff --git a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.cpp b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.cpp index 6f5109faec4..9e18f6cdb08 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.cpp +++ b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.cpp @@ -29,25 +29,21 @@ ObjectStorageKey createMetadataObjectKey(const std::string & key_prefix, const s } MetadataStorageFromPlainObjectStorageCreateDirectoryOperation::MetadataStorageFromPlainObjectStorageCreateDirectoryOperation( - std::filesystem::path && path_, - std::string && key_prefix_, - InMemoryPathMap & path_map_, - ObjectStoragePtr object_storage_, - const std::string & metadata_key_prefix_) + std::filesystem::path && path_, InMemoryPathMap & path_map_, ObjectStoragePtr object_storage_, const std::string & metadata_key_prefix_) : path(std::move(path_)) - , key_prefix(key_prefix_) , path_map(path_map_) , object_storage(object_storage_) , metadata_key_prefix(metadata_key_prefix_) + , key_prefix(object_storage->generateObjectKeyPrefixForDirectoryPath(path, "" /* key_prefix */).serialize()) { } void MetadataStorageFromPlainObjectStorageCreateDirectoryOperation::execute(std::unique_lock &) { - auto & map = path_map.map; auto & mutex = path_map.mutex; { std::shared_lock lock(mutex); + auto & map = path_map.map; if (map.contains(path.parent_path())) return; } @@ -72,6 +68,7 @@ void MetadataStorageFromPlainObjectStorageCreateDirectoryOperation::execute(std: { std::unique_lock lock(mutex); + auto & map = path_map.map; [[maybe_unused]] auto result = map.emplace(path.parent_path(), std::move(key_prefix)); chassert(result.second); } @@ -89,7 +86,6 @@ void MetadataStorageFromPlainObjectStorageCreateDirectoryOperation::execute(std: void MetadataStorageFromPlainObjectStorageCreateDirectoryOperation::undo(std::unique_lock &) { - auto & map = path_map.map; auto & mutex = path_map.mutex; auto metadata_object_key = createMetadataObjectKey(key_prefix, metadata_key_prefix); @@ -98,6 +94,7 @@ void MetadataStorageFromPlainObjectStorageCreateDirectoryOperation::undo(std::un { { std::unique_lock lock(mutex); + auto & map = path_map.map; map.erase(path.parent_path()); } auto metric = object_storage->getMetadataStorageMetrics().directory_map_size; @@ -126,12 +123,12 @@ MetadataStorageFromPlainObjectStorageMoveDirectoryOperation::MetadataStorageFrom std::unique_ptr MetadataStorageFromPlainObjectStorageMoveDirectoryOperation::createWriteBuf( const std::filesystem::path & expected_path, const std::filesystem::path & new_path, bool validate_content) { - auto & map = path_map.map; auto & mutex = path_map.mutex; std::filesystem::path remote_path; { std::shared_lock lock(mutex); + auto & map = path_map.map; auto expected_it = map.find(expected_path.parent_path()); if (expected_it == map.end()) throw Exception( @@ -182,10 +179,10 @@ void MetadataStorageFromPlainObjectStorageMoveDirectoryOperation::execute(std::u writeString(path_to.string(), *write_buf); write_buf->finalize(); - auto & map = path_map.map; auto & mutex = path_map.mutex; { std::unique_lock lock(mutex); + auto & map = path_map.map; [[maybe_unused]] auto result = map.emplace(path_to.parent_path(), map.extract(path_from.parent_path()).mapped()); chassert(result.second); } @@ -197,9 +194,9 @@ void MetadataStorageFromPlainObjectStorageMoveDirectoryOperation::undo(std::uniq { if (write_finalized) { - auto & map = path_map.map; auto & mutex = path_map.mutex; std::unique_lock lock(mutex); + auto & map = path_map.map; map.emplace(path_from.parent_path(), map.extract(path_to.parent_path()).mapped()); } @@ -219,10 +216,10 @@ MetadataStorageFromPlainObjectStorageRemoveDirectoryOperation::MetadataStorageFr void MetadataStorageFromPlainObjectStorageRemoveDirectoryOperation::execute(std::unique_lock & /* metadata_lock */) { - auto & map = path_map.map; auto & mutex = path_map.mutex; { std::shared_lock lock(mutex); + auto & map = path_map.map; auto path_it = map.find(path.parent_path()); if (path_it == map.end()) return; @@ -237,6 +234,7 @@ void MetadataStorageFromPlainObjectStorageRemoveDirectoryOperation::execute(std: { std::unique_lock lock(mutex); + auto & map = path_map.map; map.erase(path.parent_path()); } @@ -265,10 +263,10 @@ void MetadataStorageFromPlainObjectStorageRemoveDirectoryOperation::undo(std::un writeString(path.string(), *buf); buf->finalize(); - auto & map = path_map.map; auto & mutex = path_map.mutex; { std::unique_lock lock(mutex); + auto & map = path_map.map; map.emplace(path.parent_path(), std::move(key_prefix)); } auto metric = object_storage->getMetadataStorageMetrics().directory_map_size; diff --git a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.h b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.h index 778585fa758..3ac0ffef8d2 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.h +++ b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.h @@ -14,10 +14,10 @@ class MetadataStorageFromPlainObjectStorageCreateDirectoryOperation final : publ { private: std::filesystem::path path; - std::string key_prefix; InMemoryPathMap & path_map; ObjectStoragePtr object_storage; const std::string metadata_key_prefix; + const std::string key_prefix; bool write_created = false; bool write_finalized = false; @@ -26,7 +26,6 @@ public: // Assuming that paths are normalized. MetadataStorageFromPlainObjectStorageCreateDirectoryOperation( std::filesystem::path && path_, - std::string && key_prefix_, InMemoryPathMap & path_map_, ObjectStoragePtr object_storage_, const std::string & metadata_key_prefix_); diff --git a/src/Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.cpp b/src/Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.cpp index c312eae4077..fd3b9523df6 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.cpp +++ b/src/Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.cpp @@ -1,4 +1,4 @@ -#include +#include #include #include #include @@ -132,16 +132,20 @@ void getDirectChildrenOnDiskImpl( SharedMutex & shared_mutex, std::unordered_set & result) { + /// Directories are retrieved from the in-memory path map. { std::shared_lock lock(shared_mutex); auto end_it = local_path_prefixes.end(); for (auto it = local_path_prefixes.lower_bound(local_path); it != end_it; ++it) { - const auto & [k, v] = std::make_tuple(it->first.string(), it->second); + const auto & [k, _] = std::make_tuple(it->first.string(), it->second); if (!k.starts_with(local_path)) break; auto slash_num = count(k.begin() + local_path.size(), k.end(), '/'); + /// The local_path_prefixes comparator ensures that the paths with the smallest number of + /// hops from the local_path are iterated first. The paths do not end with '/', hence + /// break the loop if the number of slashes is greater than 0. if (slash_num != 0) break; @@ -149,6 +153,7 @@ void getDirectChildrenOnDiskImpl( } } + /// Files. auto skip_list = std::set{PREFIX_PATH_FILE_NAME}; for (const auto & elem : remote_paths) { @@ -189,7 +194,8 @@ MetadataStorageFromPlainRewritableObjectStorage::MetadataStorageFromPlainRewrita } else { - auto keys_gen = std::make_shared(object_storage->getCommonKeyPrefix(), path_map); + /// Use flat directory structure if the metadata is stored separately from the table data. + auto keys_gen = std::make_shared(object_storage->getCommonKeyPrefix(), path_map); object_storage->setKeysGenerator(keys_gen); } } From d4c13714abb6b307c4344c74bd4b7973c03e68df Mon Sep 17 00:00:00 2001 From: Julia Kartseva Date: Sat, 13 Jul 2024 06:51:02 +0000 Subject: [PATCH 579/661] address feedback: TSA_GUARDED_BY --- .../CommonPathPrefixKeyGenerator.cpp | 5 +-- .../FlatDirectoryStructureKeyGenerator.cpp | 7 ++-- src/Disks/ObjectStorages/InMemoryPathMap.h | 4 +-- ...torageFromPlainObjectStorageOperations.cpp | 34 +++++++------------ ...torageFromPlainRewritableObjectStorage.cpp | 34 +++++++++++-------- 5 files changed, 40 insertions(+), 44 deletions(-) diff --git a/src/Disks/ObjectStorages/CommonPathPrefixKeyGenerator.cpp b/src/Disks/ObjectStorages/CommonPathPrefixKeyGenerator.cpp index 1fa06823bae..1d041626a7e 100644 --- a/src/Disks/ObjectStorages/CommonPathPrefixKeyGenerator.cpp +++ b/src/Disks/ObjectStorages/CommonPathPrefixKeyGenerator.cpp @@ -2,6 +2,7 @@ #include #include +#include #include #include @@ -49,8 +50,8 @@ std::tuple> CommonPathPrefixKeyGenerator:: std::filesystem::path p(path); std::deque dq; - auto ptr = path_map.lock(); - std::shared_lock lock(ptr->mutex); + const auto ptr = path_map.lock(); + SharedLockGuard lock(ptr->mutex); while (p != p.root_path()) { diff --git a/src/Disks/ObjectStorages/FlatDirectoryStructureKeyGenerator.cpp b/src/Disks/ObjectStorages/FlatDirectoryStructureKeyGenerator.cpp index 64959b729b6..0f35bfd2427 100644 --- a/src/Disks/ObjectStorages/FlatDirectoryStructureKeyGenerator.cpp +++ b/src/Disks/ObjectStorages/FlatDirectoryStructureKeyGenerator.cpp @@ -1,7 +1,8 @@ #include "FlatDirectoryStructureKeyGenerator.h" #include #include "Common/ObjectStorageKey.h" -#include "Common/SharedMutex.h" +#include +#include #include #include @@ -26,8 +27,8 @@ ObjectStorageKey FlatDirectoryStructureKeyGenerator::generate(const String & pat std::optional remote_path; { - auto ptr = path_map.lock(); - std::shared_lock lock(ptr->mutex); + const auto ptr = path_map.lock(); + SharedLockGuard lock(ptr->mutex); auto it = ptr->map.find(p); if (it != ptr->map.end()) return ObjectStorageKey::createAsRelative(key_prefix.has_value() ? *key_prefix : storage_key_prefix, it->second); diff --git a/src/Disks/ObjectStorages/InMemoryPathMap.h b/src/Disks/ObjectStorages/InMemoryPathMap.h index ea08784719e..dcd28dfaf6c 100644 --- a/src/Disks/ObjectStorages/InMemoryPathMap.h +++ b/src/Disks/ObjectStorages/InMemoryPathMap.h @@ -22,8 +22,8 @@ struct InMemoryPathMap } }; using Map = std::map; - Map map; - SharedMutex mutex; + mutable SharedMutex mutex; + Map map TSA_GUARDED_BY(mutex); }; } diff --git a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.cpp b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.cpp index 9e18f6cdb08..8a06b204cfc 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.cpp +++ b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.cpp @@ -1,4 +1,5 @@ #include "MetadataStorageFromPlainObjectStorageOperations.h" +#include "Common/SharedLockGuard.h" #include #include @@ -40,11 +41,9 @@ MetadataStorageFromPlainObjectStorageCreateDirectoryOperation::MetadataStorageFr void MetadataStorageFromPlainObjectStorageCreateDirectoryOperation::execute(std::unique_lock &) { - auto & mutex = path_map.mutex; { - std::shared_lock lock(mutex); - auto & map = path_map.map; - if (map.contains(path.parent_path())) + SharedLockGuard lock(path_map.mutex); + if (path_map.map.contains(path.parent_path())) return; } @@ -67,7 +66,7 @@ void MetadataStorageFromPlainObjectStorageCreateDirectoryOperation::execute(std: write_created = true; { - std::unique_lock lock(mutex); + std::lock_guard lock(path_map.mutex); auto & map = path_map.map; [[maybe_unused]] auto result = map.emplace(path.parent_path(), std::move(key_prefix)); chassert(result.second); @@ -86,16 +85,13 @@ void MetadataStorageFromPlainObjectStorageCreateDirectoryOperation::execute(std: void MetadataStorageFromPlainObjectStorageCreateDirectoryOperation::undo(std::unique_lock &) { - auto & mutex = path_map.mutex; - auto metadata_object_key = createMetadataObjectKey(key_prefix, metadata_key_prefix); if (write_finalized) { { - std::unique_lock lock(mutex); - auto & map = path_map.map; - map.erase(path.parent_path()); + std::lock_guard lock(path_map.mutex); + path_map.map.erase(path.parent_path()); } auto metric = object_storage->getMetadataStorageMetrics().directory_map_size; CurrentMetrics::sub(metric, 1); @@ -123,11 +119,9 @@ MetadataStorageFromPlainObjectStorageMoveDirectoryOperation::MetadataStorageFrom std::unique_ptr MetadataStorageFromPlainObjectStorageMoveDirectoryOperation::createWriteBuf( const std::filesystem::path & expected_path, const std::filesystem::path & new_path, bool validate_content) { - auto & mutex = path_map.mutex; - std::filesystem::path remote_path; { - std::shared_lock lock(mutex); + SharedLockGuard lock(path_map.mutex); auto & map = path_map.map; auto expected_it = map.find(expected_path.parent_path()); if (expected_it == map.end()) @@ -179,9 +173,8 @@ void MetadataStorageFromPlainObjectStorageMoveDirectoryOperation::execute(std::u writeString(path_to.string(), *write_buf); write_buf->finalize(); - auto & mutex = path_map.mutex; { - std::unique_lock lock(mutex); + std::lock_guard lock(path_map.mutex); auto & map = path_map.map; [[maybe_unused]] auto result = map.emplace(path_to.parent_path(), map.extract(path_from.parent_path()).mapped()); chassert(result.second); @@ -194,8 +187,7 @@ void MetadataStorageFromPlainObjectStorageMoveDirectoryOperation::undo(std::uniq { if (write_finalized) { - auto & mutex = path_map.mutex; - std::unique_lock lock(mutex); + std::lock_guard lock(path_map.mutex); auto & map = path_map.map; map.emplace(path_from.parent_path(), map.extract(path_to.parent_path()).mapped()); } @@ -216,9 +208,8 @@ MetadataStorageFromPlainObjectStorageRemoveDirectoryOperation::MetadataStorageFr void MetadataStorageFromPlainObjectStorageRemoveDirectoryOperation::execute(std::unique_lock & /* metadata_lock */) { - auto & mutex = path_map.mutex; { - std::shared_lock lock(mutex); + SharedLockGuard lock(path_map.mutex); auto & map = path_map.map; auto path_it = map.find(path.parent_path()); if (path_it == map.end()) @@ -233,7 +224,7 @@ void MetadataStorageFromPlainObjectStorageRemoveDirectoryOperation::execute(std: object_storage->removeObject(metadata_object); { - std::unique_lock lock(mutex); + std::lock_guard lock(path_map.mutex); auto & map = path_map.map; map.erase(path.parent_path()); } @@ -263,9 +254,8 @@ void MetadataStorageFromPlainObjectStorageRemoveDirectoryOperation::undo(std::un writeString(path.string(), *buf); buf->finalize(); - auto & mutex = path_map.mutex; { - std::unique_lock lock(mutex); + std::lock_guard lock(path_map.mutex); auto & map = path_map.map; map.emplace(path.parent_path(), std::move(key_prefix)); } diff --git a/src/Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.cpp b/src/Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.cpp index fd3b9523df6..22e73e36372 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.cpp +++ b/src/Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.cpp @@ -9,6 +9,8 @@ #include #include #include +#include "Common/SharedLockGuard.h" +#include "Common/SharedMutex.h" #include "CommonPathPrefixKeyGenerator.h" @@ -39,14 +41,13 @@ std::string getMetadataKeyPrefix(ObjectStoragePtr object_storage) : metadata_key_prefix; } -InMemoryPathMap::Map loadPathPrefixMap(const std::string & metadata_key_prefix, ObjectStoragePtr object_storage) +std::shared_ptr loadPathPrefixMap(const std::string & metadata_key_prefix, ObjectStoragePtr object_storage) { + auto result = std::make_shared(); using Map = InMemoryPathMap::Map; - Map result; ThreadPool & pool = getIOThreadPool().get(); ThreadPoolCallbackRunnerLocal runner(pool, "PlainRWMetaLoad"); - std::mutex mutex; LoggerPtr log = getLogger("MetadataStorageFromPlainObjectStorage"); @@ -66,7 +67,7 @@ InMemoryPathMap::Map loadPathPrefixMap(const std::string & metadata_key_prefix, if (remote_metadata_path.filename() != PREFIX_PATH_FILE_NAME) continue; - runner([remote_metadata_path, path, &object_storage, &result, &mutex, &log, &settings, &metadata_key_prefix] + runner([remote_metadata_path, path, &object_storage, &result, &log, &settings, &metadata_key_prefix] { setThreadName("PlainRWMetaLoad"); @@ -99,8 +100,8 @@ InMemoryPathMap::Map loadPathPrefixMap(const std::string & metadata_key_prefix, auto remote_path = std::filesystem::path(std::move(suffix)); std::pair res; { - std::lock_guard lock(mutex); - res = result.emplace(std::filesystem::path(local_path).parent_path(), remote_path.parent_path()); + std::lock_guard lock(result->mutex); + res = result->map.emplace(std::filesystem::path(local_path).parent_path(), remote_path.parent_path()); } /// This can happen if table replication is enabled, then the same local path is written @@ -117,10 +118,13 @@ InMemoryPathMap::Map loadPathPrefixMap(const std::string & metadata_key_prefix, } runner.waitForAllToFinishAndRethrowFirstError(); - LOG_DEBUG(log, "Loaded metadata for {} files, found {} directories", num_files, result.size()); + { + SharedLockGuard lock(result->mutex); + LOG_DEBUG(log, "Loaded metadata for {} files, found {} directories", num_files, result->map.size()); - auto metric = object_storage->getMetadataStorageMetrics().directory_map_size; - CurrentMetrics::add(metric, result.size()); + auto metric = object_storage->getMetadataStorageMetrics().directory_map_size; + CurrentMetrics::add(metric, result->map.size()); + } return result; } @@ -128,14 +132,14 @@ void getDirectChildrenOnDiskImpl( const std::string & storage_key, const RelativePathsWithMetadata & remote_paths, const std::string & local_path, - const InMemoryPathMap::Map & local_path_prefixes, - SharedMutex & shared_mutex, + const InMemoryPathMap & path_map, std::unordered_set & result) { /// Directories are retrieved from the in-memory path map. { - std::shared_lock lock(shared_mutex); - auto end_it = local_path_prefixes.end(); + SharedLockGuard lock(path_map.mutex); + const auto & local_path_prefixes = path_map.map; + const auto end_it = local_path_prefixes.end(); for (auto it = local_path_prefixes.lower_bound(local_path); it != end_it; ++it) { const auto & [k, _] = std::make_tuple(it->first.string(), it->second); @@ -179,7 +183,7 @@ MetadataStorageFromPlainRewritableObjectStorage::MetadataStorageFromPlainRewrita ObjectStoragePtr object_storage_, String storage_path_prefix_) : MetadataStorageFromPlainObjectStorage(object_storage_, storage_path_prefix_) , metadata_key_prefix(DB::getMetadataKeyPrefix(object_storage)) - , path_map(std::make_shared(loadPathPrefixMap(metadata_key_prefix, object_storage))) + , path_map(loadPathPrefixMap(metadata_key_prefix, object_storage)) { if (object_storage->isWriteOnce()) throw Exception( @@ -261,7 +265,7 @@ void MetadataStorageFromPlainRewritableObjectStorage::getDirectChildrenOnDisk( const std::string & local_path, std::unordered_set & result) const { - getDirectChildrenOnDiskImpl(storage_key, remote_paths, local_path, getPathMap()->map, getPathMap()->mutex, result); + getDirectChildrenOnDiskImpl(storage_key, remote_paths, local_path, *getPathMap(), result); } } From db13ba2c488303e90717fbcc5adf5304241ac474 Mon Sep 17 00:00:00 2001 From: Julia Kartseva Date: Sat, 13 Jul 2024 06:51:40 +0000 Subject: [PATCH 580/661] style fix --- .../CommonPathPrefixKeyGenerator.cpp | 2 +- ...torageFromPlainObjectStorageOperations.cpp | 2 +- ...torageFromPlainRewritableObjectStorage.cpp | 91 ++++++++++--------- 3 files changed, 48 insertions(+), 47 deletions(-) diff --git a/src/Disks/ObjectStorages/CommonPathPrefixKeyGenerator.cpp b/src/Disks/ObjectStorages/CommonPathPrefixKeyGenerator.cpp index 1d041626a7e..521d5c037ab 100644 --- a/src/Disks/ObjectStorages/CommonPathPrefixKeyGenerator.cpp +++ b/src/Disks/ObjectStorages/CommonPathPrefixKeyGenerator.cpp @@ -1,8 +1,8 @@ #include #include -#include #include +#include #include #include diff --git a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.cpp b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.cpp index 8a06b204cfc..76090411bb9 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.cpp +++ b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.cpp @@ -1,6 +1,6 @@ #include "MetadataStorageFromPlainObjectStorageOperations.h" -#include "Common/SharedLockGuard.h" #include +#include "Common/SharedLockGuard.h" #include #include diff --git a/src/Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.cpp b/src/Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.cpp index 22e73e36372..dba63bba321 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.cpp +++ b/src/Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.cpp @@ -7,10 +7,10 @@ #include #include #include -#include -#include #include "Common/SharedLockGuard.h" #include "Common/SharedMutex.h" +#include +#include #include "CommonPathPrefixKeyGenerator.h" @@ -67,54 +67,55 @@ std::shared_ptr loadPathPrefixMap(const std::string & metadata_ if (remote_metadata_path.filename() != PREFIX_PATH_FILE_NAME) continue; - runner([remote_metadata_path, path, &object_storage, &result, &log, &settings, &metadata_key_prefix] - { - setThreadName("PlainRWMetaLoad"); - - StoredObject object{path}; - String local_path; - - try + runner( + [remote_metadata_path, path, &object_storage, &result, &log, &settings, &metadata_key_prefix] { - auto read_buf = object_storage->readObject(object, settings); - readStringUntilEOF(local_path, *read_buf); - } + setThreadName("PlainRWMetaLoad"); + + StoredObject object{path}; + String local_path; + + try + { + auto read_buf = object_storage->readObject(object, settings); + readStringUntilEOF(local_path, *read_buf); + } #if USE_AWS_S3 - catch (const S3Exception & e) - { - /// It is ok if a directory was removed just now. - /// We support attaching a filesystem that is concurrently modified by someone else. - if (e.getS3ErrorCode() == Aws::S3::S3Errors::NO_SUCH_KEY) - return; - throw; - } + catch (const S3Exception & e) + { + /// It is ok if a directory was removed just now. + /// We support attaching a filesystem that is concurrently modified by someone else. + if (e.getS3ErrorCode() == Aws::S3::S3Errors::NO_SUCH_KEY) + return; + throw; + } #endif - catch (...) - { - throw; - } + catch (...) + { + throw; + } - chassert(remote_metadata_path.has_parent_path()); - chassert(remote_metadata_path.string().starts_with(metadata_key_prefix)); - auto suffix = remote_metadata_path.string().substr(metadata_key_prefix.size()); - auto remote_path = std::filesystem::path(std::move(suffix)); - std::pair res; - { - std::lock_guard lock(result->mutex); - res = result->map.emplace(std::filesystem::path(local_path).parent_path(), remote_path.parent_path()); - } + chassert(remote_metadata_path.has_parent_path()); + chassert(remote_metadata_path.string().starts_with(metadata_key_prefix)); + auto suffix = remote_metadata_path.string().substr(metadata_key_prefix.size()); + auto remote_path = std::filesystem::path(std::move(suffix)); + std::pair res; + { + std::lock_guard lock(result->mutex); + res = result->map.emplace(std::filesystem::path(local_path).parent_path(), remote_path.parent_path()); + } - /// This can happen if table replication is enabled, then the same local path is written - /// in `prefix.path` of each replica. - /// TODO: should replicated tables (e.g., RMT) be explicitly disallowed? - if (!res.second) - LOG_WARNING( - log, - "The local path '{}' is already mapped to a remote path '{}', ignoring: '{}'", - local_path, - res.first->second, - remote_path.parent_path().string()); - }); + /// This can happen if table replication is enabled, then the same local path is written + /// in `prefix.path` of each replica. + /// TODO: should replicated tables (e.g., RMT) be explicitly disallowed? + if (!res.second) + LOG_WARNING( + log, + "The local path '{}' is already mapped to a remote path '{}', ignoring: '{}'", + local_path, + res.first->second, + remote_path.parent_path().string()); + }); } runner.waitForAllToFinishAndRethrowFirstError(); From 912bddf86f53f207b76ba453e43b6724b24ef6df Mon Sep 17 00:00:00 2001 From: Julia Kartseva Date: Sun, 14 Jul 2024 20:49:37 -0700 Subject: [PATCH 581/661] Update src/Disks/ObjectStorages/InMemoryPathMap.h Co-authored-by: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> --- src/Disks/ObjectStorages/InMemoryPathMap.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Disks/ObjectStorages/InMemoryPathMap.h b/src/Disks/ObjectStorages/InMemoryPathMap.h index dcd28dfaf6c..2ac291dfaf0 100644 --- a/src/Disks/ObjectStorages/InMemoryPathMap.h +++ b/src/Disks/ObjectStorages/InMemoryPathMap.h @@ -21,6 +21,7 @@ struct InMemoryPathMap return path1 < path2; } }; + /// Local -> Remote path. using Map = std::map; mutable SharedMutex mutex; Map map TSA_GUARDED_BY(mutex); From 727f5ed108e3b92c81d4ed295e0de438de8bae2b Mon Sep 17 00:00:00 2001 From: Julia Kartseva Date: Sun, 14 Jul 2024 20:49:57 -0700 Subject: [PATCH 582/661] Update src/Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.cpp Co-authored-by: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> --- .../MetadataStorageFromPlainRewritableObjectStorage.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.cpp b/src/Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.cpp index dba63bba321..3380dec60ca 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.cpp +++ b/src/Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.cpp @@ -29,7 +29,7 @@ constexpr auto PREFIX_PATH_FILE_NAME = "prefix.path"; constexpr auto METADATA_PATH_TOKEN = "__meta/"; /// Use a separate layout for metadata iff: -/// 1. The disk endpoint does not contain objects, OR +/// 1. The disk endpoint does not contain any objects yet (empty), OR /// 2. The metadata is already stored behind a separate endpoint. /// Otherwise, store metadata along with regular data for backward compatibility. std::string getMetadataKeyPrefix(ObjectStoragePtr object_storage) From 4c78531c9c0681a84309e02ecfde17a36f1c1ad5 Mon Sep 17 00:00:00 2001 From: Julia Kartseva Date: Sun, 14 Jul 2024 20:50:20 -0700 Subject: [PATCH 583/661] Update src/Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.cpp Co-authored-by: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> --- .../MetadataStorageFromPlainRewritableObjectStorage.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.cpp b/src/Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.cpp index 3380dec60ca..40aed32c047 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.cpp +++ b/src/Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.cpp @@ -28,7 +28,7 @@ namespace constexpr auto PREFIX_PATH_FILE_NAME = "prefix.path"; constexpr auto METADATA_PATH_TOKEN = "__meta/"; -/// Use a separate layout for metadata iff: +/// Use a separate layout for metadata if: /// 1. The disk endpoint does not contain any objects yet (empty), OR /// 2. The metadata is already stored behind a separate endpoint. /// Otherwise, store metadata along with regular data for backward compatibility. From 3b986ef3400021cf18797a7872b926dcc191b547 Mon Sep 17 00:00:00 2001 From: Julia Kartseva Date: Mon, 15 Jul 2024 05:50:42 +0000 Subject: [PATCH 584/661] address feedback: useSeparateLayoutForMetadata --- ...StorageFromPlainRewritableObjectStorage.cpp | 18 +++++++++++------- ...taStorageFromPlainRewritableObjectStorage.h | 4 +++- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/src/Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.cpp b/src/Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.cpp index 40aed32c047..39b11d9a3e3 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.cpp +++ b/src/Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.cpp @@ -192,15 +192,15 @@ MetadataStorageFromPlainRewritableObjectStorage::MetadataStorageFromPlainRewrita "MetadataStorageFromPlainRewritableObjectStorage is not compatible with write-once storage '{}'", object_storage->getName()); - if (getMetadataKeyPrefix() == object_storage->getCommonKeyPrefix()) + if (useSeparateLayoutForMetadata()) { - auto keys_gen = std::make_shared(object_storage->getCommonKeyPrefix(), path_map); + /// Use flat directory structure if the metadata is stored separately from the table data. + auto keys_gen = std::make_shared(object_storage->getCommonKeyPrefix(), path_map); object_storage->setKeysGenerator(keys_gen); } else { - /// Use flat directory structure if the metadata is stored separately from the table data. - auto keys_gen = std::make_shared(object_storage->getCommonKeyPrefix(), path_map); + auto keys_gen = std::make_shared(object_storage->getCommonKeyPrefix(), path_map); object_storage->setKeysGenerator(keys_gen); } } @@ -216,7 +216,7 @@ bool MetadataStorageFromPlainRewritableObjectStorage::exists(const std::string & if (MetadataStorageFromPlainObjectStorage::exists(path)) return true; - if (getMetadataKeyPrefix() != object_storage->getCommonKeyPrefix()) + if (useSeparateLayoutForMetadata()) { auto key_prefix = object_storage->generateObjectKeyForPath(path, getMetadataKeyPrefix()).serialize(); return object_storage->existsOrHasAnyChild(key_prefix); @@ -227,7 +227,7 @@ bool MetadataStorageFromPlainRewritableObjectStorage::exists(const std::string & bool MetadataStorageFromPlainRewritableObjectStorage::isDirectory(const std::string & path) const { - if (getMetadataKeyPrefix() != object_storage->getCommonKeyPrefix()) + if (useSeparateLayoutForMetadata()) { auto directory = std::filesystem::path(object_storage->generateObjectKeyForPath(path, getMetadataKeyPrefix()).serialize()) / ""; return object_storage->existsOrHasAnyChild(directory); @@ -249,7 +249,7 @@ std::vector MetadataStorageFromPlainRewritableObjectStorage::listDi getDirectChildrenOnDisk(abs_key, files, std::filesystem::path(path) / "", directories); /// List empty directories that are identified by the `prefix.path` metadata files. This is required to, e.g., remove /// metadata along with regular files. - if (object_storage->getCommonKeyPrefix() != getMetadataKeyPrefix()) + if (useSeparateLayoutForMetadata()) { auto metadata_key = std::filesystem::path(getMetadataKeyPrefix()) / key_prefix / ""; RelativePathsWithMetadata metadata_files; @@ -269,4 +269,8 @@ void MetadataStorageFromPlainRewritableObjectStorage::getDirectChildrenOnDisk( getDirectChildrenOnDiskImpl(storage_key, remote_paths, local_path, *getPathMap(), result); } +bool MetadataStorageFromPlainRewritableObjectStorage::useSeparateLayoutForMetadata() const +{ + return getMetadataKeyPrefix() != object_storage->getCommonKeyPrefix(); +} } diff --git a/src/Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.h b/src/Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.h index 8fd147e15b9..82d93e3e7ae 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.h +++ b/src/Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.h @@ -24,7 +24,6 @@ public: bool isDirectory(const std::string & path) const override; std::vector listDirectory(const std::string & path) const override; - protected: std::string getMetadataKeyPrefix() const override { return metadata_key_prefix; } std::shared_ptr getPathMap() const override { return path_map; } @@ -33,6 +32,9 @@ protected: const RelativePathsWithMetadata & remote_paths, const std::string & local_path, std::unordered_set & result) const; + +private: + bool useSeparateLayoutForMetadata() const; }; } From 359b42738a25aa02436c1bebc49d0b751e456ccb Mon Sep 17 00:00:00 2001 From: Julia Kartseva Date: Mon, 15 Jul 2024 06:11:51 +0000 Subject: [PATCH 585/661] address feedback: key_prefix -> object_key_prefix --- ...tadataStorageFromPlainObjectStorageOperations.cpp | 12 ++++++------ ...MetadataStorageFromPlainObjectStorageOperations.h | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.cpp b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.cpp index 76090411bb9..31fb8c7ef97 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.cpp +++ b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.cpp @@ -22,9 +22,9 @@ namespace constexpr auto PREFIX_PATH_FILE_NAME = "prefix.path"; -ObjectStorageKey createMetadataObjectKey(const std::string & key_prefix, const std::string & metadata_key_prefix) +ObjectStorageKey createMetadataObjectKey(const std::string & object_key_prefix, const std::string & metadata_key_prefix) { - auto prefix = std::filesystem::path(metadata_key_prefix) / key_prefix; + auto prefix = std::filesystem::path(metadata_key_prefix) / object_key_prefix; return ObjectStorageKey::createAsRelative(prefix.string(), PREFIX_PATH_FILE_NAME); } } @@ -35,7 +35,7 @@ MetadataStorageFromPlainObjectStorageCreateDirectoryOperation::MetadataStorageFr , path_map(path_map_) , object_storage(object_storage_) , metadata_key_prefix(metadata_key_prefix_) - , key_prefix(object_storage->generateObjectKeyPrefixForDirectoryPath(path, "" /* key_prefix */).serialize()) + , object_key_prefix(object_storage->generateObjectKeyPrefixForDirectoryPath(path, "" /* object_key_prefix */).serialize()) { } @@ -47,7 +47,7 @@ void MetadataStorageFromPlainObjectStorageCreateDirectoryOperation::execute(std: return; } - auto metadata_object_key = createMetadataObjectKey(key_prefix, metadata_key_prefix); + auto metadata_object_key = createMetadataObjectKey(object_key_prefix, metadata_key_prefix); LOG_TRACE( getLogger("MetadataStorageFromPlainObjectStorageCreateDirectoryOperation"), @@ -68,7 +68,7 @@ void MetadataStorageFromPlainObjectStorageCreateDirectoryOperation::execute(std: { std::lock_guard lock(path_map.mutex); auto & map = path_map.map; - [[maybe_unused]] auto result = map.emplace(path.parent_path(), std::move(key_prefix)); + [[maybe_unused]] auto result = map.emplace(path.parent_path(), std::move(object_key_prefix)); chassert(result.second); } auto metric = object_storage->getMetadataStorageMetrics().directory_map_size; @@ -85,7 +85,7 @@ void MetadataStorageFromPlainObjectStorageCreateDirectoryOperation::execute(std: void MetadataStorageFromPlainObjectStorageCreateDirectoryOperation::undo(std::unique_lock &) { - auto metadata_object_key = createMetadataObjectKey(key_prefix, metadata_key_prefix); + auto metadata_object_key = createMetadataObjectKey(object_key_prefix, metadata_key_prefix); if (write_finalized) { diff --git a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.h b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.h index 3ac0ffef8d2..02305767faf 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.h +++ b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.h @@ -17,7 +17,7 @@ private: InMemoryPathMap & path_map; ObjectStoragePtr object_storage; const std::string metadata_key_prefix; - const std::string key_prefix; + const std::string object_key_prefix; bool write_created = false; bool write_finalized = false; From 79a8cbe0c595e877a750c26cc27a8c68202279c7 Mon Sep 17 00:00:00 2001 From: Julia Kartseva Date: Mon, 15 Jul 2024 06:16:00 +0000 Subject: [PATCH 586/661] address feedback: documentation --- .../MetadataStorageFromPlainObjectStorageOperations.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.cpp b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.cpp index 31fb8c7ef97..be5168c5385 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.cpp +++ b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.cpp @@ -55,7 +55,7 @@ void MetadataStorageFromPlainObjectStorageCreateDirectoryOperation::execute(std: path, metadata_object_key.serialize()); - auto metadata_object = StoredObject(metadata_object_key.serialize(), path / PREFIX_PATH_FILE_NAME); + auto metadata_object = StoredObject(/*remote_path*/ metadata_object_key.serialize(), /*local_path*/ path / PREFIX_PATH_FILE_NAME); auto buf = object_storage->writeObject( metadata_object, WriteMode::Rewrite, @@ -137,7 +137,8 @@ std::unique_ptr MetadataStorageFromPlainObjectStorageMo auto metadata_object_key = createMetadataObjectKey(remote_path, metadata_key_prefix); - auto metadata_object = StoredObject(metadata_object_key.serialize(), expected_path / PREFIX_PATH_FILE_NAME); + auto metadata_object + = StoredObject(/*remote_path*/ metadata_object_key.serialize(), /*local_path*/ expected_path / PREFIX_PATH_FILE_NAME); if (validate_content) { @@ -220,7 +221,7 @@ void MetadataStorageFromPlainObjectStorageRemoveDirectoryOperation::execute(std: LOG_TRACE(getLogger("MetadataStorageFromPlainObjectStorageRemoveDirectoryOperation"), "Removing directory '{}'", path); auto metadata_object_key = createMetadataObjectKey(key_prefix, metadata_key_prefix); - auto metadata_object = StoredObject(metadata_object_key.serialize(), path / PREFIX_PATH_FILE_NAME); + auto metadata_object = StoredObject(/*remote_path*/ metadata_object_key.serialize(), /*local_path*/ path / PREFIX_PATH_FILE_NAME); object_storage->removeObject(metadata_object); { From 42bd49dae6244590ba406ad502083bf610276eb9 Mon Sep 17 00:00:00 2001 From: Julia Kartseva Date: Mon, 15 Jul 2024 07:05:51 +0000 Subject: [PATCH 587/661] address feedback: parent_path() for directories --- ...torageFromPlainObjectStorageOperations.cpp | 30 ++++++++++++------- ...aStorageFromPlainObjectStorageOperations.h | 4 ++- 2 files changed, 23 insertions(+), 11 deletions(-) diff --git a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.cpp b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.cpp index be5168c5385..c0e3f8e1fc9 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.cpp +++ b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.cpp @@ -31,7 +31,7 @@ ObjectStorageKey createMetadataObjectKey(const std::string & object_key_prefix, MetadataStorageFromPlainObjectStorageCreateDirectoryOperation::MetadataStorageFromPlainObjectStorageCreateDirectoryOperation( std::filesystem::path && path_, InMemoryPathMap & path_map_, ObjectStoragePtr object_storage_, const std::string & metadata_key_prefix_) - : path(std::move(path_)) + : path((chassert(path_.string().ends_with('/')), std::move(path_))) , path_map(path_map_) , object_storage(object_storage_) , metadata_key_prefix(metadata_key_prefix_) @@ -41,9 +41,11 @@ MetadataStorageFromPlainObjectStorageCreateDirectoryOperation::MetadataStorageFr void MetadataStorageFromPlainObjectStorageCreateDirectoryOperation::execute(std::unique_lock &) { + /// parent_path() removes the trailing '/' + const auto base_path = path.parent_path(); { SharedLockGuard lock(path_map.mutex); - if (path_map.map.contains(path.parent_path())) + if (path_map.map.contains(base_path)) return; } @@ -68,7 +70,7 @@ void MetadataStorageFromPlainObjectStorageCreateDirectoryOperation::execute(std: { std::lock_guard lock(path_map.mutex); auto & map = path_map.map; - [[maybe_unused]] auto result = map.emplace(path.parent_path(), std::move(object_key_prefix)); + [[maybe_unused]] auto result = map.emplace(base_path, std::move(object_key_prefix)); chassert(result.second); } auto metric = object_storage->getMetadataStorageMetrics().directory_map_size; @@ -89,9 +91,10 @@ void MetadataStorageFromPlainObjectStorageCreateDirectoryOperation::undo(std::un if (write_finalized) { + const auto base_path = path.parent_path(); { std::lock_guard lock(path_map.mutex); - path_map.map.erase(path.parent_path()); + path_map.map.erase(base_path); } auto metric = object_storage->getMetadataStorageMetrics().directory_map_size; CurrentMetrics::sub(metric, 1); @@ -108,8 +111,8 @@ MetadataStorageFromPlainObjectStorageMoveDirectoryOperation::MetadataStorageFrom InMemoryPathMap & path_map_, ObjectStoragePtr object_storage_, const std::string & metadata_key_prefix_) - : path_from(std::move(path_from_)) - , path_to(std::move(path_to_)) + : path_from((chassert(path_from_.string().ends_with('/')), std::move(path_from_))) + , path_to((chassert(path_to_.string().ends_with('/')), std::move(path_to_))) , path_map(path_map_) , object_storage(object_storage_) , metadata_key_prefix(metadata_key_prefix_) @@ -123,6 +126,7 @@ std::unique_ptr MetadataStorageFromPlainObjectStorageMo { SharedLockGuard lock(path_map.mutex); auto & map = path_map.map; + /// parent_path() removes the trailing '/'. auto expected_it = map.find(expected_path.parent_path()); if (expected_it == map.end()) throw Exception( @@ -174,10 +178,14 @@ void MetadataStorageFromPlainObjectStorageMoveDirectoryOperation::execute(std::u writeString(path_to.string(), *write_buf); write_buf->finalize(); + /// parent_path() removes the trailing '/'. + auto base_path_to = path_to.parent_path(); + auto base_path_from = path_from.parent_path(); + { std::lock_guard lock(path_map.mutex); auto & map = path_map.map; - [[maybe_unused]] auto result = map.emplace(path_to.parent_path(), map.extract(path_from.parent_path()).mapped()); + [[maybe_unused]] auto result = map.emplace(base_path_to, map.extract(base_path_from).mapped()); chassert(result.second); } @@ -203,16 +211,18 @@ void MetadataStorageFromPlainObjectStorageMoveDirectoryOperation::undo(std::uniq MetadataStorageFromPlainObjectStorageRemoveDirectoryOperation::MetadataStorageFromPlainObjectStorageRemoveDirectoryOperation( std::filesystem::path && path_, InMemoryPathMap & path_map_, ObjectStoragePtr object_storage_, const std::string & metadata_key_prefix_) - : path(std::move(path_)), path_map(path_map_), object_storage(object_storage_), metadata_key_prefix(metadata_key_prefix_) + : path((chassert(path_.string().ends_with('/')), std::move(path_))), path_map(path_map_), object_storage(object_storage_), metadata_key_prefix(metadata_key_prefix_) { } void MetadataStorageFromPlainObjectStorageRemoveDirectoryOperation::execute(std::unique_lock & /* metadata_lock */) { + /// parent_path() removes the trailing '/' + const auto base_path = path.parent_path(); { SharedLockGuard lock(path_map.mutex); auto & map = path_map.map; - auto path_it = map.find(path.parent_path()); + auto path_it = map.find(base_path); if (path_it == map.end()) return; key_prefix = path_it->second; @@ -227,7 +237,7 @@ void MetadataStorageFromPlainObjectStorageRemoveDirectoryOperation::execute(std: { std::lock_guard lock(path_map.mutex); auto & map = path_map.map; - map.erase(path.parent_path()); + map.erase(base_path); } auto metric = object_storage->getMetadataStorageMetrics().directory_map_size; diff --git a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.h b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.h index 02305767faf..93ebe668d56 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.h +++ b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.h @@ -23,8 +23,8 @@ private: bool write_finalized = false; public: - // Assuming that paths are normalized. MetadataStorageFromPlainObjectStorageCreateDirectoryOperation( + /// path_ must end with a trailing '/'. std::filesystem::path && path_, InMemoryPathMap & path_map_, ObjectStoragePtr object_storage_, @@ -51,6 +51,7 @@ private: public: MetadataStorageFromPlainObjectStorageMoveDirectoryOperation( + /// Both path_from_ and path_to_ must end with a trailing '/'. std::filesystem::path && path_from_, std::filesystem::path && path_to_, InMemoryPathMap & path_map_, @@ -76,6 +77,7 @@ private: public: MetadataStorageFromPlainObjectStorageRemoveDirectoryOperation( + /// path_ must end with a trailing '/'. std::filesystem::path && path_, InMemoryPathMap & path_map_, ObjectStoragePtr object_storage_, From 41fc84bb2df441d117681c589dbea5c516ed4748 Mon Sep 17 00:00:00 2001 From: Julia Kartseva Date: Mon, 15 Jul 2024 21:16:27 +0000 Subject: [PATCH 588/661] fix build --- src/Disks/ObjectStorages/InMemoryPathMap.h | 1 + ...taStorageFromPlainObjectStorageOperations.cpp | 16 ++++++++++------ 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/src/Disks/ObjectStorages/InMemoryPathMap.h b/src/Disks/ObjectStorages/InMemoryPathMap.h index 2ac291dfaf0..e319c187ca7 100644 --- a/src/Disks/ObjectStorages/InMemoryPathMap.h +++ b/src/Disks/ObjectStorages/InMemoryPathMap.h @@ -2,6 +2,7 @@ #include #include +#include #include namespace DB diff --git a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.cpp b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.cpp index c0e3f8e1fc9..bfd203ef2e0 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.cpp +++ b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.cpp @@ -1,10 +1,10 @@ #include "MetadataStorageFromPlainObjectStorageOperations.h" #include -#include "Common/SharedLockGuard.h" #include #include #include +#include #include namespace DB @@ -31,12 +31,13 @@ ObjectStorageKey createMetadataObjectKey(const std::string & object_key_prefix, MetadataStorageFromPlainObjectStorageCreateDirectoryOperation::MetadataStorageFromPlainObjectStorageCreateDirectoryOperation( std::filesystem::path && path_, InMemoryPathMap & path_map_, ObjectStoragePtr object_storage_, const std::string & metadata_key_prefix_) - : path((chassert(path_.string().ends_with('/')), std::move(path_))) + : path(std::move(path_)) , path_map(path_map_) , object_storage(object_storage_) , metadata_key_prefix(metadata_key_prefix_) , object_key_prefix(object_storage->generateObjectKeyPrefixForDirectoryPath(path, "" /* object_key_prefix */).serialize()) { + chassert(path.string().ends_with('/')); } void MetadataStorageFromPlainObjectStorageCreateDirectoryOperation::execute(std::unique_lock &) @@ -70,7 +71,7 @@ void MetadataStorageFromPlainObjectStorageCreateDirectoryOperation::execute(std: { std::lock_guard lock(path_map.mutex); auto & map = path_map.map; - [[maybe_unused]] auto result = map.emplace(base_path, std::move(object_key_prefix)); + [[maybe_unused]] auto result = map.emplace(base_path, object_key_prefix); chassert(result.second); } auto metric = object_storage->getMetadataStorageMetrics().directory_map_size; @@ -111,12 +112,14 @@ MetadataStorageFromPlainObjectStorageMoveDirectoryOperation::MetadataStorageFrom InMemoryPathMap & path_map_, ObjectStoragePtr object_storage_, const std::string & metadata_key_prefix_) - : path_from((chassert(path_from_.string().ends_with('/')), std::move(path_from_))) - , path_to((chassert(path_to_.string().ends_with('/')), std::move(path_to_))) + : path_from(std::move(path_from_)) + , path_to(std::move(path_to_)) , path_map(path_map_) , object_storage(object_storage_) , metadata_key_prefix(metadata_key_prefix_) { + chassert(path_from.string().ends_with('/')); + chassert(path_to.string().ends_with('/')); } std::unique_ptr MetadataStorageFromPlainObjectStorageMoveDirectoryOperation::createWriteBuf( @@ -211,8 +214,9 @@ void MetadataStorageFromPlainObjectStorageMoveDirectoryOperation::undo(std::uniq MetadataStorageFromPlainObjectStorageRemoveDirectoryOperation::MetadataStorageFromPlainObjectStorageRemoveDirectoryOperation( std::filesystem::path && path_, InMemoryPathMap & path_map_, ObjectStoragePtr object_storage_, const std::string & metadata_key_prefix_) - : path((chassert(path_.string().ends_with('/')), std::move(path_))), path_map(path_map_), object_storage(object_storage_), metadata_key_prefix(metadata_key_prefix_) + : path(std::move(path_)), path_map(path_map_), object_storage(object_storage_), metadata_key_prefix(metadata_key_prefix_) { + chassert(path.string().ends_with('/')); } void MetadataStorageFromPlainObjectStorageRemoveDirectoryOperation::execute(std::unique_lock & /* metadata_lock */) From 9b4e02e8dabb649076389ee96a271da025913ddf Mon Sep 17 00:00:00 2001 From: Julia Kartseva Date: Tue, 16 Jul 2024 22:48:37 +0000 Subject: [PATCH 589/661] fix macOs build --- src/Disks/ObjectStorages/InMemoryPathMap.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/Disks/ObjectStorages/InMemoryPathMap.h b/src/Disks/ObjectStorages/InMemoryPathMap.h index e319c187ca7..a9859d5e2b8 100644 --- a/src/Disks/ObjectStorages/InMemoryPathMap.h +++ b/src/Disks/ObjectStorages/InMemoryPathMap.h @@ -25,7 +25,13 @@ struct InMemoryPathMap /// Local -> Remote path. using Map = std::map; mutable SharedMutex mutex; - Map map TSA_GUARDED_BY(mutex); + +#ifdef OS_LINUX + Map TSA_GUARDED_BY(mutex) map; +/// std::shared_mutex may not be annotated with the 'capability' attribute in libcxx. +#else + Map map; +#endif }; } From 774cba09dfd4ab347d05caf45f8135a3a51771c3 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Fri, 2 Aug 2024 08:48:41 +0200 Subject: [PATCH 590/661] Fix flaky test_replicated_table_attach --- .../test_replicated_table_attach/configs/config.xml | 2 +- tests/integration/test_replicated_table_attach/test.py | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/tests/integration/test_replicated_table_attach/configs/config.xml b/tests/integration/test_replicated_table_attach/configs/config.xml index fea3eab4126..3f72f638776 100644 --- a/tests/integration/test_replicated_table_attach/configs/config.xml +++ b/tests/integration/test_replicated_table_attach/configs/config.xml @@ -1,6 +1,6 @@ 1 - 5 + 3 diff --git a/tests/integration/test_replicated_table_attach/test.py b/tests/integration/test_replicated_table_attach/test.py index de60b7ec291..4fe8064b26a 100644 --- a/tests/integration/test_replicated_table_attach/test.py +++ b/tests/integration/test_replicated_table_attach/test.py @@ -80,4 +80,8 @@ def test_startup_with_small_bg_pool_partitioned(started_cluster): assert_values() # check that we activate it in the end - node.query_with_retry("INSERT INTO replicated_table_partitioned VALUES(20, 30)") + node.query_with_retry( + "INSERT INTO replicated_table_partitioned VALUES(20, 30)", + retry_count=20, + sleep_time=3, + ) From 797144270b3e20e9e4306949bde95c9a9a32c5e0 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Fri, 2 Aug 2024 07:09:39 +0000 Subject: [PATCH 591/661] Update version_date.tsv and changelogs after v24.4.4.113-stable --- docs/changelogs/v24.4.4.113-stable.md | 73 +++++++++++++++++++++++++++ utils/list-versions/version_date.tsv | 1 + 2 files changed, 74 insertions(+) create mode 100644 docs/changelogs/v24.4.4.113-stable.md diff --git a/docs/changelogs/v24.4.4.113-stable.md b/docs/changelogs/v24.4.4.113-stable.md new file mode 100644 index 00000000000..1f8a221a0a2 --- /dev/null +++ b/docs/changelogs/v24.4.4.113-stable.md @@ -0,0 +1,73 @@ +--- +sidebar_position: 1 +sidebar_label: 2024 +--- + +# 2024 Changelog + +### ClickHouse release v24.4.4.113-stable (d63a54957bd) FIXME as compared to v24.4.3.25-stable (a915dd4eda4) + +#### Improvement +* Backported in [#65884](https://github.com/ClickHouse/ClickHouse/issues/65884): Always start Keeper with sufficient amount of threads in global thread pool. [#64444](https://github.com/ClickHouse/ClickHouse/pull/64444) ([Duc Canh Le](https://github.com/canhld94)). +* Backported in [#65303](https://github.com/ClickHouse/ClickHouse/issues/65303): Returned back the behaviour of how ClickHouse works and interprets Tuples in CSV format. This change effectively reverts https://github.com/ClickHouse/ClickHouse/pull/60994 and makes it available only under a few settings: `output_format_csv_serialize_tuple_into_separate_columns`, `input_format_csv_deserialize_separate_columns_into_tuple` and `input_format_csv_try_infer_strings_from_quoted_tuples`. [#65170](https://github.com/ClickHouse/ClickHouse/pull/65170) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Backported in [#65894](https://github.com/ClickHouse/ClickHouse/issues/65894): Respect cgroup CPU limit in Keeper. [#65819](https://github.com/ClickHouse/ClickHouse/pull/65819) ([Antonio Andelic](https://github.com/antonio2368)). + +#### Critical Bug Fix (crash, LOGICAL_ERROR, data loss, RBAC) +* Backported in [#65372](https://github.com/ClickHouse/ClickHouse/issues/65372): Fix a bug in ClickHouse Keeper that causes digest mismatch during closing session. [#65198](https://github.com/ClickHouse/ClickHouse/pull/65198) ([Aleksei Filatov](https://github.com/aalexfvk)). +* Backported in [#66883](https://github.com/ClickHouse/ClickHouse/issues/66883): Fix unexpeced size of low cardinality column in function calls. [#65298](https://github.com/ClickHouse/ClickHouse/pull/65298) ([Raúl Marín](https://github.com/Algunenano)). +* Backported in [#65435](https://github.com/ClickHouse/ClickHouse/issues/65435): Forbid `QUALIFY` clause in the old analyzer. The old analyzer ignored `QUALIFY`, so it could lead to unexpected data removal in mutations. [#65356](https://github.com/ClickHouse/ClickHouse/pull/65356) ([Dmitry Novik](https://github.com/novikd)). +* Backported in [#65448](https://github.com/ClickHouse/ClickHouse/issues/65448): Use correct memory alignment for Distinct combinator. Previously, crash could happen because of invalid memory allocation when the combinator was used. [#65379](https://github.com/ClickHouse/ClickHouse/pull/65379) ([Antonio Andelic](https://github.com/antonio2368)). +* Backported in [#65710](https://github.com/ClickHouse/ClickHouse/issues/65710): Fix crash in maxIntersections. [#65689](https://github.com/ClickHouse/ClickHouse/pull/65689) ([Raúl Marín](https://github.com/Algunenano)). +* Backported in [#66689](https://github.com/ClickHouse/ClickHouse/issues/66689): Fix the VALID UNTIL clause in the user definition resetting after a restart. Closes [#66405](https://github.com/ClickHouse/ClickHouse/issues/66405). [#66409](https://github.com/ClickHouse/ClickHouse/pull/66409) ([Nikolay Degterinsky](https://github.com/evillique)). +* Backported in [#67499](https://github.com/ClickHouse/ClickHouse/issues/67499): Fix crash in DistributedAsyncInsert when connection is empty. [#67219](https://github.com/ClickHouse/ClickHouse/pull/67219) ([Pablo Marcos](https://github.com/pamarcos)). + +#### Bug Fix (user-visible misbehavior in an official stable release) +* Backported in [#65353](https://github.com/ClickHouse/ClickHouse/issues/65353): Fix possible abort on uncaught exception in ~WriteBufferFromFileDescriptor in StatusFile. [#64206](https://github.com/ClickHouse/ClickHouse/pull/64206) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#65060](https://github.com/ClickHouse/ClickHouse/issues/65060): Fix the `Expression nodes list expected 1 projection names` and `Unknown expression or identifier` errors for queries with aliases to `GLOBAL IN.`. [#64517](https://github.com/ClickHouse/ClickHouse/pull/64517) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#65329](https://github.com/ClickHouse/ClickHouse/issues/65329): Fix the crash loop when restoring from backup is blocked by creating an MV with a definer that hasn't been restored yet. [#64595](https://github.com/ClickHouse/ClickHouse/pull/64595) ([pufit](https://github.com/pufit)). +* Backported in [#64833](https://github.com/ClickHouse/ClickHouse/issues/64833): Fix bug which could lead to non-working TTLs with expressions. [#64694](https://github.com/ClickHouse/ClickHouse/pull/64694) ([alesapin](https://github.com/alesapin)). +* Backported in [#65086](https://github.com/ClickHouse/ClickHouse/issues/65086): Fix removing the `WHERE` and `PREWHERE` expressions, which are always true (for the new analyzer). [#64695](https://github.com/ClickHouse/ClickHouse/pull/64695) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#65540](https://github.com/ClickHouse/ClickHouse/issues/65540): Fix crash for `ALTER TABLE ... ON CLUSTER ... MODIFY SQL SECURITY`. [#64957](https://github.com/ClickHouse/ClickHouse/pull/64957) ([pufit](https://github.com/pufit)). +* Backported in [#65578](https://github.com/ClickHouse/ClickHouse/issues/65578): Fix crash on destroying AccessControl: add explicit shutdown. [#64993](https://github.com/ClickHouse/ClickHouse/pull/64993) ([Vitaly Baranov](https://github.com/vitlibar)). +* Backported in [#65161](https://github.com/ClickHouse/ClickHouse/issues/65161): Fix pushing arithmetic operations out of aggregation. In the new analyzer, optimization was applied only once. [#65104](https://github.com/ClickHouse/ClickHouse/pull/65104) ([Dmitry Novik](https://github.com/novikd)). +* Backported in [#65616](https://github.com/ClickHouse/ClickHouse/issues/65616): Fix aggregate function name rewriting in the new analyzer. [#65110](https://github.com/ClickHouse/ClickHouse/pull/65110) ([Dmitry Novik](https://github.com/novikd)). +* Backported in [#65730](https://github.com/ClickHouse/ClickHouse/issues/65730): Eliminate injective function in argument of functions `uniq*` recursively. This used to work correctly but was broken in the new analyzer. [#65140](https://github.com/ClickHouse/ClickHouse/pull/65140) ([Duc Canh Le](https://github.com/canhld94)). +* Backported in [#65668](https://github.com/ClickHouse/ClickHouse/issues/65668): Disable `non-intersecting-parts` optimization for queries with `FINAL` in case of `read-in-order` optimization was enabled. This could lead to an incorrect query result. As a workaround, disable `do_not_merge_across_partitions_select_final` and `split_parts_ranges_into_intersecting_and_non_intersecting_final` before this fix is merged. [#65505](https://github.com/ClickHouse/ClickHouse/pull/65505) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#65786](https://github.com/ClickHouse/ClickHouse/issues/65786): Fixed bug in MergeJoin. Column in sparse serialisation might be treated as a column of its nested type though the required conversion wasn't performed. [#65632](https://github.com/ClickHouse/ClickHouse/pull/65632) ([Nikita Taranov](https://github.com/nickitat)). +* Backported in [#65810](https://github.com/ClickHouse/ClickHouse/issues/65810): Fix invalid exceptions in function `parseDateTime` with `%F` and `%D` placeholders. [#65768](https://github.com/ClickHouse/ClickHouse/pull/65768) ([Antonio Andelic](https://github.com/antonio2368)). +* Backported in [#65931](https://github.com/ClickHouse/ClickHouse/issues/65931): For queries that read from `PostgreSQL`, cancel the internal `PostgreSQL` query if the ClickHouse query is finished. Otherwise, `ClickHouse` query cannot be canceled until the internal `PostgreSQL` query is finished. [#65771](https://github.com/ClickHouse/ClickHouse/pull/65771) ([Maksim Kita](https://github.com/kitaisreal)). +* Backported in [#65826](https://github.com/ClickHouse/ClickHouse/issues/65826): Fix a bug in short circuit logic when old analyzer and dictGetOrDefault is used. [#65802](https://github.com/ClickHouse/ClickHouse/pull/65802) ([jsc0218](https://github.com/jsc0218)). +* Backported in [#66299](https://github.com/ClickHouse/ClickHouse/issues/66299): Better handling of join conditions involving `IS NULL` checks (for example `ON (a = b AND (a IS NOT NULL) AND (b IS NOT NULL) ) OR ( (a IS NULL) AND (b IS NULL) )` is rewritten to `ON a <=> b`), fix incorrect optimization when condition other then `IS NULL` are present. [#65835](https://github.com/ClickHouse/ClickHouse/pull/65835) ([vdimir](https://github.com/vdimir)). +* Backported in [#66326](https://github.com/ClickHouse/ClickHouse/issues/66326): Add missing settings `input_format_csv_skip_first_lines/input_format_tsv_skip_first_lines/input_format_csv_try_infer_numbers_from_strings/input_format_csv_try_infer_strings_from_quoted_tuples` in schema inference cache because they can change the resulting schema. It prevents from incorrect result of schema inference with these settings changed. [#65980](https://github.com/ClickHouse/ClickHouse/pull/65980) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#66153](https://github.com/ClickHouse/ClickHouse/issues/66153): Fixed buffer overflow bug in `unbin`/`unhex` implementation. [#66106](https://github.com/ClickHouse/ClickHouse/pull/66106) ([Nikita Taranov](https://github.com/nickitat)). +* Backported in [#66459](https://github.com/ClickHouse/ClickHouse/issues/66459): Fixed a bug in ZooKeeper client: a session could get stuck in unusable state after receiving a hardware error from ZooKeeper. For example, this might happen due to "soft memory limit" in ClickHouse Keeper. [#66140](https://github.com/ClickHouse/ClickHouse/pull/66140) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#66224](https://github.com/ClickHouse/ClickHouse/issues/66224): Fix issue in SumIfToCountIfVisitor and signed integers. [#66146](https://github.com/ClickHouse/ClickHouse/pull/66146) ([Raúl Marín](https://github.com/Algunenano)). +* Backported in [#66267](https://github.com/ClickHouse/ClickHouse/issues/66267): Don't throw `TIMEOUT_EXCEEDED` for `none_only_active` mode of `distributed_ddl_output_mode`. [#66218](https://github.com/ClickHouse/ClickHouse/pull/66218) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#66678](https://github.com/ClickHouse/ClickHouse/issues/66678): Fix handling limit for `system.numbers_mt` when no index can be used. [#66231](https://github.com/ClickHouse/ClickHouse/pull/66231) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Backported in [#66603](https://github.com/ClickHouse/ClickHouse/issues/66603): Fixed how the ClickHouse server detects the maximum number of usable CPU cores as specified by cgroups v2 if the server runs in a container such as Docker. In more detail, containers often run their process in the root cgroup which has an empty name. In that case, ClickHouse ignored the CPU limits set by cgroups v2. [#66237](https://github.com/ClickHouse/ClickHouse/pull/66237) ([filimonov](https://github.com/filimonov)). +* Backported in [#66358](https://github.com/ClickHouse/ClickHouse/issues/66358): Fix the `Not-ready set` error when a subquery with `IN` is used in the constraint. [#66261](https://github.com/ClickHouse/ClickHouse/pull/66261) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#66971](https://github.com/ClickHouse/ClickHouse/issues/66971): Fix `Column identifier is already registered` error with `group_by_use_nulls=true` and new analyzer. [#66400](https://github.com/ClickHouse/ClickHouse/pull/66400) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#66968](https://github.com/ClickHouse/ClickHouse/issues/66968): Fix `Cannot find column` error for queries with constant expression in `GROUP BY` key and new analyzer enabled. [#66433](https://github.com/ClickHouse/ClickHouse/pull/66433) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#66719](https://github.com/ClickHouse/ClickHouse/issues/66719): Correctly track memory for `Allocator::realloc`. [#66548](https://github.com/ClickHouse/ClickHouse/pull/66548) ([Antonio Andelic](https://github.com/antonio2368)). +* Backported in [#66950](https://github.com/ClickHouse/ClickHouse/issues/66950): Fix an invalid result for queries with `WINDOW`. This could happen when `PARTITION` columns have sparse serialization and window functions are executed in parallel. [#66579](https://github.com/ClickHouse/ClickHouse/pull/66579) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#66947](https://github.com/ClickHouse/ClickHouse/issues/66947): Fix `Method getResultType is not supported for QUERY query node` error when scalar subquery was used as the first argument of IN (with new analyzer). [#66655](https://github.com/ClickHouse/ClickHouse/pull/66655) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#67631](https://github.com/ClickHouse/ClickHouse/issues/67631): Fix for occasional deadlock in Context::getDDLWorker. [#66843](https://github.com/ClickHouse/ClickHouse/pull/66843) ([Alexander Gololobov](https://github.com/davenger)). +* Backported in [#67195](https://github.com/ClickHouse/ClickHouse/issues/67195): TRUNCATE DATABASE used to stop replication as if it was a DROP DATABASE query, it's fixed. [#67129](https://github.com/ClickHouse/ClickHouse/pull/67129) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#67377](https://github.com/ClickHouse/ClickHouse/issues/67377): Fix error `Cannot convert column because it is non constant in source stream but must be constant in result.` for a query that reads from the `Merge` table over the `Distriburted` table with one shard. [#67146](https://github.com/ClickHouse/ClickHouse/pull/67146) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#67240](https://github.com/ClickHouse/ClickHouse/issues/67240): This closes [#67156](https://github.com/ClickHouse/ClickHouse/issues/67156). This closes [#66447](https://github.com/ClickHouse/ClickHouse/issues/66447). The bug was introduced in https://github.com/ClickHouse/ClickHouse/pull/62907. [#67178](https://github.com/ClickHouse/ClickHouse/pull/67178) ([Maksim Kita](https://github.com/kitaisreal)). +* Backported in [#67574](https://github.com/ClickHouse/ClickHouse/issues/67574): Fix execution of nested short-circuit functions. [#67520](https://github.com/ClickHouse/ClickHouse/pull/67520) ([Kruglov Pavel](https://github.com/Avogar)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Backported in [#65410](https://github.com/ClickHouse/ClickHouse/issues/65410): Re-enable OpenSSL session caching. [#65111](https://github.com/ClickHouse/ClickHouse/pull/65111) ([Robert Schulze](https://github.com/rschu1ze)). +* Backported in [#65903](https://github.com/ClickHouse/ClickHouse/issues/65903): Fix bug with session closing in Keeper. [#65735](https://github.com/ClickHouse/ClickHouse/pull/65735) ([Antonio Andelic](https://github.com/antonio2368)). +* Backported in [#66385](https://github.com/ClickHouse/ClickHouse/issues/66385): Disable broken cases from 02911_join_on_nullsafe_optimization. [#66310](https://github.com/ClickHouse/ClickHouse/pull/66310) ([vdimir](https://github.com/vdimir)). +* Backported in [#66424](https://github.com/ClickHouse/ClickHouse/issues/66424): Ignore subquery for IN in DDLLoadingDependencyVisitor. [#66395](https://github.com/ClickHouse/ClickHouse/pull/66395) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#66542](https://github.com/ClickHouse/ClickHouse/issues/66542): Add additional log masking in CI. [#66523](https://github.com/ClickHouse/ClickHouse/pull/66523) ([Raúl Marín](https://github.com/Algunenano)). +* Backported in [#66857](https://github.com/ClickHouse/ClickHouse/issues/66857): Fix data race in S3::ClientCache. [#66644](https://github.com/ClickHouse/ClickHouse/pull/66644) ([Konstantin Morozov](https://github.com/k-morozov)). +* Backported in [#66873](https://github.com/ClickHouse/ClickHouse/issues/66873): Support one more case in JOIN ON ... IS NULL. [#66725](https://github.com/ClickHouse/ClickHouse/pull/66725) ([vdimir](https://github.com/vdimir)). +* Backported in [#67057](https://github.com/ClickHouse/ClickHouse/issues/67057): Increase asio pool size in case the server is tiny. [#66761](https://github.com/ClickHouse/ClickHouse/pull/66761) ([alesapin](https://github.com/alesapin)). +* Backported in [#66944](https://github.com/ClickHouse/ClickHouse/issues/66944): Small fix in realloc memory tracking. [#66820](https://github.com/ClickHouse/ClickHouse/pull/66820) ([Antonio Andelic](https://github.com/antonio2368)). +* Backported in [#67250](https://github.com/ClickHouse/ClickHouse/issues/67250): Followup [#66725](https://github.com/ClickHouse/ClickHouse/issues/66725). [#66869](https://github.com/ClickHouse/ClickHouse/pull/66869) ([vdimir](https://github.com/vdimir)). +* Backported in [#67410](https://github.com/ClickHouse/ClickHouse/issues/67410): CI: Fix build results for release branches. [#67402](https://github.com/ClickHouse/ClickHouse/pull/67402) ([Max K.](https://github.com/maxknv)). + diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv index b1391c2d781..7b5dcda82e3 100644 --- a/utils/list-versions/version_date.tsv +++ b/utils/list-versions/version_date.tsv @@ -6,6 +6,7 @@ v24.5.4.49-stable 2024-07-01 v24.5.3.5-stable 2024-06-13 v24.5.2.34-stable 2024-06-13 v24.5.1.1763-stable 2024-06-01 +v24.4.4.113-stable 2024-08-02 v24.4.3.25-stable 2024-06-14 v24.4.2.141-stable 2024-06-07 v24.4.1.2088-stable 2024-05-01 From dc65c0aa078cf06357291c0fe68f6c035698320f Mon Sep 17 00:00:00 2001 From: Pablo Marcos Date: Fri, 2 Aug 2024 07:15:40 +0000 Subject: [PATCH 592/661] Fix doc for parallel test execution Copy-pasterino strikes again. I forgot to remove the single quote. With it, pytest thinks the whole argument is a file: (no name '/ClickHouse/tests/integration/test_storage_s3_queue/test.py::test_max_set_age -- --count 10 -n 5' in any of []) --- tests/integration/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/README.md b/tests/integration/README.md index a8deb97b526..85146c79b1e 100644 --- a/tests/integration/README.md +++ b/tests/integration/README.md @@ -142,7 +142,7 @@ of parallel workers for `pytest-xdist`. $ export CLICKHOUSE_TESTS_BASE_CONFIG_DIR=$HOME/ClickHouse/programs/server/ $ export CLICKHOUSE_TESTS_SERVER_BIN_PATH=$HOME/ClickHouse/programs/clickhouse $ export CLICKHOUSE_TESTS_ODBC_BRIDGE_BIN_PATH=$HOME/ClickHouse/programs/clickhouse-odbc-bridge -$ ./runner 'test_storage_s3_queue/test.py::test_max_set_age -- --count 10 -n 5' +$ ./runner test_storage_s3_queue/test.py::test_max_set_age --count 10 -n 5 Start tests =============================================================================== test session starts ================================================================================ platform linux -- Python 3.10.12, pytest-7.4.4, pluggy-1.5.0 -- /usr/bin/python3 From 9c05a0ad5a0269af02ae2234e1d01dc3ce64bce2 Mon Sep 17 00:00:00 2001 From: Michael Stetsyuk Date: Fri, 2 Aug 2024 09:34:32 +0100 Subject: [PATCH 593/661] rm dirs in test_storage_delta --- tests/integration/test_storage_delta/test.py | 28 +++++++------------- 1 file changed, 10 insertions(+), 18 deletions(-) diff --git a/tests/integration/test_storage_delta/test.py b/tests/integration/test_storage_delta/test.py index 384b8296f66..92a870ab360 100644 --- a/tests/integration/test_storage_delta/test.py +++ b/tests/integration/test_storage_delta/test.py @@ -8,6 +8,7 @@ import os import json import time import glob +import shutil import pyspark import delta @@ -52,15 +53,6 @@ def get_spark(): return builder.master("local").getOrCreate() -def remove_local_directory_contents(full_path): - for path in glob.glob(f"{full_path}/**"): - if os.path.isfile(path): - os.unlink(path) - else: - remove_local_directory_contents(path) - os.rmdir(path) - - @pytest.fixture(scope="module") def started_cluster(): try: @@ -179,7 +171,7 @@ def test_single_log_file(started_cluster): ) os.unlink(parquet_data_path) - remove_local_directory_contents(f"/{TABLE_NAME}") + shutil.rmtree(f"/{TABLE_NAME}") def test_partition_by(started_cluster): @@ -203,7 +195,7 @@ def test_partition_by(started_cluster): create_delta_table(instance, TABLE_NAME) assert int(instance.query(f"SELECT count() FROM {TABLE_NAME}")) == 10 - remove_local_directory_contents(f"/{TABLE_NAME}") + shutil.rmtree(f"/{TABLE_NAME}") def test_checkpoint(started_cluster): @@ -280,7 +272,7 @@ def test_checkpoint(started_cluster): ).strip() ) - remove_local_directory_contents(f"/{TABLE_NAME}") + shutil.rmtree(f"/{TABLE_NAME}") spark.sql(f"DROP TABLE {TABLE_NAME}") @@ -321,7 +313,7 @@ def test_multiple_log_files(started_cluster): "SELECT number, toString(number + 1) FROM numbers(200)" ) - remove_local_directory_contents(f"/{TABLE_NAME}") + shutil.rmtree(f"/{TABLE_NAME}") def test_metadata(started_cluster): @@ -357,7 +349,7 @@ def test_metadata(started_cluster): assert int(instance.query(f"SELECT count() FROM {TABLE_NAME}")) == 100 os.unlink(parquet_data_path) - remove_local_directory_contents(f"/{TABLE_NAME}") + shutil.rmtree(f"/{TABLE_NAME}") def test_types(started_cluster): @@ -431,7 +423,7 @@ def test_types(started_cluster): ] ) - remove_local_directory_contents(f"/{result_file}") + shutil.rmtree(f"/{result_file}") spark.sql(f"DROP TABLE {TABLE_NAME}") @@ -496,7 +488,7 @@ def test_restart_broken(started_cluster): assert int(instance.query(f"SELECT count() FROM {TABLE_NAME}")) == 100 os.unlink(parquet_data_path) - remove_local_directory_contents(f"/{TABLE_NAME}") + shutil.rmtree(f"/{TABLE_NAME}") def test_restart_broken_table_function(started_cluster): @@ -553,7 +545,7 @@ def test_restart_broken_table_function(started_cluster): assert int(instance.query(f"SELECT count() FROM {TABLE_NAME}")) == 100 os.unlink(parquet_data_path) - remove_local_directory_contents(f"/{TABLE_NAME}") + shutil.rmtree(f"/{TABLE_NAME}") def test_partition_columns(started_cluster): @@ -753,5 +745,5 @@ SELECT * FROM deltaLake('http://{started_cluster.minio_ip}:{started_cluster.mini == 1 ) - remove_local_directory_contents(f"/{TABLE_NAME}") + shutil.rmtree(f"/{TABLE_NAME}") spark.sql(f"DROP TABLE {TABLE_NAME}") From 01ca36cb5a157ab961dbd4460acc7e2ebb37e72a Mon Sep 17 00:00:00 2001 From: Michael Stetsyuk Date: Fri, 2 Aug 2024 09:37:47 +0100 Subject: [PATCH 594/661] empty From 6c8f458b0bf9981068c7fecfdd9cef627406419b Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Fri, 2 Aug 2024 11:13:41 +0200 Subject: [PATCH 595/661] Fix reloading SQL UDFs with UNION --- .../UserDefinedSQLFunctionFactory.cpp | 8 +++++-- .../UserDefinedSQLObjectsDiskStorage.cpp | 6 ++--- .../UserDefinedSQLObjectsDiskStorage.h | 1 - .../UserDefinedSQLObjectsStorageBase.cpp | 15 +++++++++--- .../UserDefinedSQLObjectsStorageBase.h | 4 ++++ .../UserDefinedSQLObjectsZooKeeperStorage.cpp | 2 +- .../UserDefinedSQLObjectsZooKeeperStorage.h | 2 -- .../NormalizeSelectWithUnionQueryVisitor.h | 2 -- .../test.py | 23 +++++++++++++++++-- .../test.py | 12 ++++++++++ .../03215_udf_with_union.reference | 1 + .../0_stateless/03215_udf_with_union.sql | 14 +++++++++++ 12 files changed, 74 insertions(+), 16 deletions(-) create mode 100644 tests/queries/0_stateless/03215_udf_with_union.reference create mode 100644 tests/queries/0_stateless/03215_udf_with_union.sql diff --git a/src/Functions/UserDefined/UserDefinedSQLFunctionFactory.cpp b/src/Functions/UserDefined/UserDefinedSQLFunctionFactory.cpp index e6796874e50..d0bc812f91d 100644 --- a/src/Functions/UserDefined/UserDefinedSQLFunctionFactory.cpp +++ b/src/Functions/UserDefined/UserDefinedSQLFunctionFactory.cpp @@ -2,6 +2,7 @@ #include #include +#include #include #include #include @@ -9,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -80,13 +82,15 @@ namespace validateFunctionRecursiveness(*function_body, name); } - ASTPtr normalizeCreateFunctionQuery(const IAST & create_function_query) + ASTPtr normalizeCreateFunctionQuery(const IAST & create_function_query, const ContextPtr & context) { auto ptr = create_function_query.clone(); auto & res = typeid_cast(*ptr); res.if_not_exists = false; res.or_replace = false; FunctionNameNormalizer::visit(res.function_core.get()); + NormalizeSelectWithUnionQueryVisitor::Data data{context->getSettingsRef().union_default_mode}; + NormalizeSelectWithUnionQueryVisitor{data}.visit(res.function_core); return ptr; } } @@ -125,7 +129,7 @@ void UserDefinedSQLFunctionFactory::checkCanBeUnregistered(const ContextPtr & co bool UserDefinedSQLFunctionFactory::registerFunction(const ContextMutablePtr & context, const String & function_name, ASTPtr create_function_query, bool throw_if_exists, bool replace_if_exists) { checkCanBeRegistered(context, function_name, *create_function_query); - create_function_query = normalizeCreateFunctionQuery(*create_function_query); + create_function_query = normalizeCreateFunctionQuery(*create_function_query, context); try { diff --git a/src/Functions/UserDefined/UserDefinedSQLObjectsDiskStorage.cpp b/src/Functions/UserDefined/UserDefinedSQLObjectsDiskStorage.cpp index 4c004d2537c..8910b45e79d 100644 --- a/src/Functions/UserDefined/UserDefinedSQLObjectsDiskStorage.cpp +++ b/src/Functions/UserDefined/UserDefinedSQLObjectsDiskStorage.cpp @@ -1,7 +1,7 @@ #include "Functions/UserDefined/UserDefinedSQLObjectsDiskStorage.h" -#include "Functions/UserDefined/UserDefinedSQLFunctionFactory.h" -#include "Functions/UserDefined/UserDefinedSQLObjectType.h" +#include +#include #include #include @@ -54,7 +54,7 @@ namespace } UserDefinedSQLObjectsDiskStorage::UserDefinedSQLObjectsDiskStorage(const ContextPtr & global_context_, const String & dir_path_) - : global_context(global_context_) + : UserDefinedSQLObjectsStorageBase(global_context_) , dir_path{makeDirectoryPathCanonical(dir_path_)} , log{getLogger("UserDefinedSQLObjectsLoaderFromDisk")} { diff --git a/src/Functions/UserDefined/UserDefinedSQLObjectsDiskStorage.h b/src/Functions/UserDefined/UserDefinedSQLObjectsDiskStorage.h index ae0cbd0c589..cafbd140598 100644 --- a/src/Functions/UserDefined/UserDefinedSQLObjectsDiskStorage.h +++ b/src/Functions/UserDefined/UserDefinedSQLObjectsDiskStorage.h @@ -42,7 +42,6 @@ private: ASTPtr tryLoadObject(UserDefinedSQLObjectType object_type, const String & object_name, const String & file_path, bool check_file_exists); String getFilePath(UserDefinedSQLObjectType object_type, const String & object_name) const; - ContextPtr global_context; String dir_path; LoggerPtr log; std::atomic objects_loaded = false; diff --git a/src/Functions/UserDefined/UserDefinedSQLObjectsStorageBase.cpp b/src/Functions/UserDefined/UserDefinedSQLObjectsStorageBase.cpp index f251d11789f..225e919301d 100644 --- a/src/Functions/UserDefined/UserDefinedSQLObjectsStorageBase.cpp +++ b/src/Functions/UserDefined/UserDefinedSQLObjectsStorageBase.cpp @@ -2,7 +2,10 @@ #include +#include +#include #include +#include #include namespace DB @@ -17,18 +20,24 @@ namespace ErrorCodes namespace { -ASTPtr normalizeCreateFunctionQuery(const IAST & create_function_query) +ASTPtr normalizeCreateFunctionQuery(const IAST & create_function_query, const ContextPtr & context) { auto ptr = create_function_query.clone(); auto & res = typeid_cast(*ptr); res.if_not_exists = false; res.or_replace = false; FunctionNameNormalizer::visit(res.function_core.get()); + NormalizeSelectWithUnionQueryVisitor::Data data{context->getSettingsRef().union_default_mode}; + NormalizeSelectWithUnionQueryVisitor{data}.visit(res.function_core); return ptr; } } +UserDefinedSQLObjectsStorageBase::UserDefinedSQLObjectsStorageBase(ContextPtr global_context_) + : global_context(std::move(global_context_)) +{} + ASTPtr UserDefinedSQLObjectsStorageBase::get(const String & object_name) const { std::lock_guard lock(mutex); @@ -148,7 +157,7 @@ void UserDefinedSQLObjectsStorageBase::setAllObjects(const std::vector normalized_functions; for (const auto & [function_name, create_query] : new_objects) - normalized_functions[function_name] = normalizeCreateFunctionQuery(*create_query); + normalized_functions[function_name] = normalizeCreateFunctionQuery(*create_query, global_context); std::lock_guard lock(mutex); object_name_to_create_object_map = std::move(normalized_functions); @@ -166,7 +175,7 @@ std::vector> UserDefinedSQLObjectsStorageBase::getAllO void UserDefinedSQLObjectsStorageBase::setObject(const String & object_name, const IAST & create_object_query) { std::lock_guard lock(mutex); - object_name_to_create_object_map[object_name] = normalizeCreateFunctionQuery(create_object_query); + object_name_to_create_object_map[object_name] = normalizeCreateFunctionQuery(create_object_query, global_context); } void UserDefinedSQLObjectsStorageBase::removeObject(const String & object_name) diff --git a/src/Functions/UserDefined/UserDefinedSQLObjectsStorageBase.h b/src/Functions/UserDefined/UserDefinedSQLObjectsStorageBase.h index cab63a3bfcf..0dbc5586f08 100644 --- a/src/Functions/UserDefined/UserDefinedSQLObjectsStorageBase.h +++ b/src/Functions/UserDefined/UserDefinedSQLObjectsStorageBase.h @@ -4,6 +4,7 @@ #include #include +#include #include @@ -13,6 +14,7 @@ namespace DB class UserDefinedSQLObjectsStorageBase : public IUserDefinedSQLObjectsStorage { public: + explicit UserDefinedSQLObjectsStorageBase(ContextPtr global_context_); ASTPtr get(const String & object_name) const override; ASTPtr tryGet(const String & object_name) const override; @@ -64,6 +66,8 @@ protected: std::unordered_map object_name_to_create_object_map; mutable std::recursive_mutex mutex; + + ContextPtr global_context; }; } diff --git a/src/Functions/UserDefined/UserDefinedSQLObjectsZooKeeperStorage.cpp b/src/Functions/UserDefined/UserDefinedSQLObjectsZooKeeperStorage.cpp index 01e7e3995fa..12c1302a3fe 100644 --- a/src/Functions/UserDefined/UserDefinedSQLObjectsZooKeeperStorage.cpp +++ b/src/Functions/UserDefined/UserDefinedSQLObjectsZooKeeperStorage.cpp @@ -48,7 +48,7 @@ namespace UserDefinedSQLObjectsZooKeeperStorage::UserDefinedSQLObjectsZooKeeperStorage( const ContextPtr & global_context_, const String & zookeeper_path_) - : global_context{global_context_} + : UserDefinedSQLObjectsStorageBase(global_context_) , zookeeper_getter{[global_context_]() { return global_context_->getZooKeeper(); }} , zookeeper_path{zookeeper_path_} , watch_queue{std::make_shared>>(std::numeric_limits::max())} diff --git a/src/Functions/UserDefined/UserDefinedSQLObjectsZooKeeperStorage.h b/src/Functions/UserDefined/UserDefinedSQLObjectsZooKeeperStorage.h index 61002be2bfd..0aa9b198398 100644 --- a/src/Functions/UserDefined/UserDefinedSQLObjectsZooKeeperStorage.h +++ b/src/Functions/UserDefined/UserDefinedSQLObjectsZooKeeperStorage.h @@ -68,8 +68,6 @@ private: void refreshObjects(const zkutil::ZooKeeperPtr & zookeeper, UserDefinedSQLObjectType object_type); void syncObjects(const zkutil::ZooKeeperPtr & zookeeper, UserDefinedSQLObjectType object_type); - ContextPtr global_context; - zkutil::ZooKeeperCachingGetter zookeeper_getter; String zookeeper_path; std::atomic objects_loaded = false; diff --git a/src/Interpreters/NormalizeSelectWithUnionQueryVisitor.h b/src/Interpreters/NormalizeSelectWithUnionQueryVisitor.h index b2f55003da5..b642b5def91 100644 --- a/src/Interpreters/NormalizeSelectWithUnionQueryVisitor.h +++ b/src/Interpreters/NormalizeSelectWithUnionQueryVisitor.h @@ -4,8 +4,6 @@ #include #include -#include - namespace DB { diff --git a/tests/integration/test_replicated_user_defined_functions/test.py b/tests/integration/test_replicated_user_defined_functions/test.py index e5f6683b90b..92d86a8fd2c 100644 --- a/tests/integration/test_replicated_user_defined_functions/test.py +++ b/tests/integration/test_replicated_user_defined_functions/test.py @@ -141,6 +141,9 @@ def test_drop_if_exists(): def test_replication(): node1.query("CREATE FUNCTION f2 AS (x, y) -> x - y") + node1.query( + "CREATE FUNCTION f3 AS () -> (SELECT sum(s) FROM (SELECT 1 as s UNION ALL SELECT 1 as s))" + ) assert ( node1.query("SELECT create_query FROM system.functions WHERE name='f2'") @@ -154,7 +157,11 @@ def test_replication(): assert node1.query("SELECT f2(12,3)") == "9\n" assert node2.query("SELECT f2(12,3)") == "9\n" + assert node1.query("SELECT f3()") == "2\n" + assert node2.query("SELECT f3()") == "2\n" + node1.query("DROP FUNCTION f2") + node1.query("DROP FUNCTION f3") assert ( node1.query("SELECT create_query FROM system.functions WHERE name='f2'") == "" ) @@ -214,7 +221,9 @@ def test_reload_zookeeper(): ) # config reloads, but can still work - node1.query("CREATE FUNCTION f2 AS (x, y) -> x - y") + node1.query( + "CREATE FUNCTION f2 AS () -> (SELECT sum(s) FROM (SELECT 1 as s UNION ALL SELECT 1 as s))" + ) assert_eq_with_retry( node2, "SELECT name FROM system.functions WHERE name IN ['f1', 'f2'] ORDER BY name", @@ -269,7 +278,7 @@ def test_reload_zookeeper(): TSV(["f1", "f2", "f3"]), ) - assert node2.query("SELECT f1(12, 3), f2(12, 3), f3(12, 3)") == TSV([[15, 9, 4]]) + assert node2.query("SELECT f1(12, 3), f2(), f3(12, 3)") == TSV([[15, 2, 4]]) active_zk_connections = get_active_zk_connections() assert ( @@ -307,3 +316,13 @@ def test_start_without_zookeeper(): "CREATE FUNCTION f1 AS (x, y) -> (x + y)\n", ) node1.query("DROP FUNCTION f1") + + +def test_server_restart(): + node1.query( + "CREATE FUNCTION f1 AS () -> (SELECT sum(s) FROM (SELECT 1 as s UNION ALL SELECT 1 as s))" + ) + assert node1.query("SELECT f1()") == "2\n" + node1.restart_clickhouse() + assert node1.query("SELECT f1()") == "2\n" + node1.query("DROP FUNCTION f1") diff --git a/tests/integration/test_user_defined_object_persistence/test.py b/tests/integration/test_user_defined_object_persistence/test.py index 986438a4eed..bd491dfa195 100644 --- a/tests/integration/test_user_defined_object_persistence/test.py +++ b/tests/integration/test_user_defined_object_persistence/test.py @@ -18,20 +18,25 @@ def started_cluster(): def test_persistence(): create_function_query1 = "CREATE FUNCTION MySum1 AS (a, b) -> a + b" create_function_query2 = "CREATE FUNCTION MySum2 AS (a, b) -> MySum1(a, b) + b" + create_function_query3 = "CREATE FUNCTION MyUnion AS () -> (SELECT sum(s) FROM (SELECT 1 as s UNION ALL SELECT 1 as s))" instance.query(create_function_query1) instance.query(create_function_query2) + instance.query(create_function_query3) assert instance.query("SELECT MySum1(1,2)") == "3\n" assert instance.query("SELECT MySum2(1,2)") == "5\n" + assert instance.query("SELECT MyUnion()") == "2\n" instance.restart_clickhouse() assert instance.query("SELECT MySum1(1,2)") == "3\n" assert instance.query("SELECT MySum2(1,2)") == "5\n" + assert instance.query("SELECT MyUnion()") == "2\n" instance.query("DROP FUNCTION MySum2") instance.query("DROP FUNCTION MySum1") + instance.query("DROP FUNCTION MyUnion") instance.restart_clickhouse() @@ -48,3 +53,10 @@ def test_persistence(): or "Function with name 'MySum2' does not exist. In scope SELECT MySum2(1, 2)" in error_message ) + + error_message = instance.query_and_get_error("SELECT MyUnion()") + assert ( + "Unknown function MyUnion" in error_message + or "Function with name 'MyUnion' does not exist. In scope SELECT MyUnion" + in error_message + ) diff --git a/tests/queries/0_stateless/03215_udf_with_union.reference b/tests/queries/0_stateless/03215_udf_with_union.reference new file mode 100644 index 00000000000..0cfbf08886f --- /dev/null +++ b/tests/queries/0_stateless/03215_udf_with_union.reference @@ -0,0 +1 @@ +2 diff --git a/tests/queries/0_stateless/03215_udf_with_union.sql b/tests/queries/0_stateless/03215_udf_with_union.sql new file mode 100644 index 00000000000..00390c5d930 --- /dev/null +++ b/tests/queries/0_stateless/03215_udf_with_union.sql @@ -0,0 +1,14 @@ +DROP FUNCTION IF EXISTS 03215_udf_with_union; +CREATE FUNCTION 03215_udf_with_union AS () -> ( + SELECT sum(s) + FROM + ( + SELECT 1 AS s + UNION ALL + SELECT 1 AS s + ) +); + +SELECT 03215_udf_with_union(); + +DROP FUNCTION 03215_udf_with_union; From af53ed4c02ba52b3f57e97941b37a5931620d447 Mon Sep 17 00:00:00 2001 From: kssenii Date: Fri, 2 Aug 2024 12:08:49 +0200 Subject: [PATCH 596/661] Ping CI From 62f0e09ecbb226ea72b5ee8d812436ef75038e33 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Fri, 2 Aug 2024 12:17:08 +0200 Subject: [PATCH 597/661] Fix setting changes --- src/Core/SettingsChangesHistory.cpp | 264 +--------------------------- 1 file changed, 2 insertions(+), 262 deletions(-) diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp index 2438202f6a3..b6ef654438e 100644 --- a/src/Core/SettingsChangesHistory.cpp +++ b/src/Core/SettingsChangesHistory.cpp @@ -57,268 +57,6 @@ String ClickHouseVersion::toString() const /// Note: please check if the key already exists to prevent duplicate entries. static std::initializer_list> settings_changes_history_initializer = { - {"24.7", {{"output_format_parquet_write_page_index", false, true, "Add a possibility to write page index into parquet files."}, - {"output_format_binary_encode_types_in_binary_format", false, false, "Added new setting to allow to write type names in binary format in RowBinaryWithNamesAndTypes output format"}, - {"input_format_binary_decode_types_in_binary_format", false, false, "Added new setting to allow to read type names in binary format in RowBinaryWithNamesAndTypes input format"}, - {"output_format_native_encode_types_in_binary_format", false, false, "Added new setting to allow to write type names in binary format in Native output format"}, - {"input_format_native_decode_types_in_binary_format", false, false, "Added new setting to allow to read type names in binary format in Native output format"}, - {"read_in_order_use_buffering", false, true, "Use buffering before merging while reading in order of primary key"}, - {"enable_named_columns_in_function_tuple", false, true, "Generate named tuples in function tuple() when all names are unique and can be treated as unquoted identifiers."}, - {"input_format_json_case_insensitive_column_matching", false, false, "Ignore case when matching JSON keys with CH columns."}, - {"optimize_trivial_insert_select", true, false, "The optimization does not make sense in many cases."}, - {"dictionary_validate_primary_key_type", false, false, "Validate primary key type for dictionaries. By default id type for simple layouts will be implicitly converted to UInt64."}, - {"collect_hash_table_stats_during_joins", false, true, "New setting."}, - {"max_size_to_preallocate_for_joins", 0, 100'000'000, "New setting."}, - {"input_format_orc_read_use_writer_time_zone", false, false, "Whether use the writer's time zone in ORC stripe for ORC row reader, the default ORC row reader's time zone is GMT."}, - {"lightweight_mutation_projection_mode", "throw", "throw", "When lightweight delete happens on a table with projection(s), the possible operations include throw the exception as projection exists, or drop all projection related to this table then do lightweight delete."}, - {"database_replicated_allow_heavy_create", true, false, "Long-running DDL queries (CREATE AS SELECT and POPULATE) for Replicated database engine was forbidden"}, - {"query_plan_merge_filters", false, false, "Allow to merge filters in the query plan"}, - {"azure_sdk_max_retries", 10, 10, "Maximum number of retries in azure sdk"}, - {"azure_sdk_retry_initial_backoff_ms", 10, 10, "Minimal backoff between retries in azure sdk"}, - {"azure_sdk_retry_max_backoff_ms", 1000, 1000, "Maximal backoff between retries in azure sdk"}, - {"merge_tree_min_bytes_per_task_for_remote_reading", 4194304, 2097152, "Value is unified with `filesystem_prefetch_min_bytes_for_single_read_task`"}, - {"ignore_on_cluster_for_replicated_named_collections_queries", false, false, "Ignore ON CLUSTER clause for replicated named collections management queries."}, - {"backup_restore_s3_retry_attempts", 1000,1000, "Setting for Aws::Client::RetryStrategy, Aws::Client does retries itself, 0 means no retries. It takes place only for backup/restore."}, - {"postgresql_connection_attempt_timeout", 2, 2, "Allow to control 'connect_timeout' parameter of PostgreSQL connection."}, - {"postgresql_connection_pool_retries", 2, 2, "Allow to control the number of retries in PostgreSQL connection pool."}, - {"restore_replace_external_table_functions_to_null", false, false, "New setting."}, - {"restore_replace_external_engines_to_null", false, false, "New setting."} - }}, - {"24.6", {{"materialize_skip_indexes_on_insert", true, true, "Added new setting to allow to disable materialization of skip indexes on insert"}, - {"materialize_statistics_on_insert", true, true, "Added new setting to allow to disable materialization of statistics on insert"}, - {"input_format_parquet_use_native_reader", false, false, "When reading Parquet files, to use native reader instead of arrow reader."}, - {"hdfs_throw_on_zero_files_match", false, false, "Allow to throw an error when ListObjects request cannot match any files in HDFS engine instead of empty query result"}, - {"azure_throw_on_zero_files_match", false, false, "Allow to throw an error when ListObjects request cannot match any files in AzureBlobStorage engine instead of empty query result"}, - {"s3_validate_request_settings", true, true, "Allow to disable S3 request settings validation"}, - {"allow_experimental_full_text_index", false, false, "Enable experimental full-text index"}, - {"azure_skip_empty_files", false, false, "Allow to skip empty files in azure table engine"}, - {"hdfs_ignore_file_doesnt_exist", false, false, "Allow to return 0 rows when the requested files don't exist instead of throwing an exception in HDFS table engine"}, - {"azure_ignore_file_doesnt_exist", false, false, "Allow to return 0 rows when the requested files don't exist instead of throwing an exception in AzureBlobStorage table engine"}, - {"s3_ignore_file_doesnt_exist", false, false, "Allow to return 0 rows when the requested files don't exist instead of throwing an exception in S3 table engine"}, - {"s3_max_part_number", 10000, 10000, "Maximum part number number for s3 upload part"}, - {"s3_max_single_operation_copy_size", 32 * 1024 * 1024, 32 * 1024 * 1024, "Maximum size for a single copy operation in s3"}, - {"input_format_parquet_max_block_size", 8192, DEFAULT_BLOCK_SIZE, "Increase block size for parquet reader."}, - {"input_format_parquet_prefer_block_bytes", 0, DEFAULT_BLOCK_SIZE * 256, "Average block bytes output by parquet reader."}, - {"enable_blob_storage_log", true, true, "Write information about blob storage operations to system.blob_storage_log table"}, - {"allow_deprecated_snowflake_conversion_functions", true, false, "Disabled deprecated functions snowflakeToDateTime[64] and dateTime[64]ToSnowflake."}, - {"allow_statistic_optimize", false, false, "Old setting which popped up here being renamed."}, - {"allow_experimental_statistic", false, false, "Old setting which popped up here being renamed."}, - {"allow_statistics_optimize", false, false, "The setting was renamed. The previous name is `allow_statistic_optimize`."}, - {"allow_experimental_statistics", false, false, "The setting was renamed. The previous name is `allow_experimental_statistic`."}, - {"enable_vertical_final", false, true, "Enable vertical final by default again after fixing bug"}, - {"parallel_replicas_custom_key_range_lower", 0, 0, "Add settings to control the range filter when using parallel replicas with dynamic shards"}, - {"parallel_replicas_custom_key_range_upper", 0, 0, "Add settings to control the range filter when using parallel replicas with dynamic shards. A value of 0 disables the upper limit"}, - {"output_format_pretty_display_footer_column_names", 0, 1, "Add a setting to display column names in the footer if there are many rows. Threshold value is controlled by output_format_pretty_display_footer_column_names_min_rows."}, - {"output_format_pretty_display_footer_column_names_min_rows", 0, 50, "Add a setting to control the threshold value for setting output_format_pretty_display_footer_column_names_min_rows. Default 50."}, - {"output_format_csv_serialize_tuple_into_separate_columns", true, true, "A new way of how interpret tuples in CSV format was added."}, - {"input_format_csv_deserialize_separate_columns_into_tuple", true, true, "A new way of how interpret tuples in CSV format was added."}, - {"input_format_csv_try_infer_strings_from_quoted_tuples", true, true, "A new way of how interpret tuples in CSV format was added."}, - }}, - {"24.5", {{"allow_deprecated_error_prone_window_functions", true, false, "Allow usage of deprecated error prone window functions (neighbor, runningAccumulate, runningDifferenceStartingWithFirstValue, runningDifference)"}, - {"allow_experimental_join_condition", false, false, "Support join with inequal conditions which involve columns from both left and right table. e.g. t1.y < t2.y."}, - {"input_format_tsv_crlf_end_of_line", false, false, "Enables reading of CRLF line endings with TSV formats"}, - {"output_format_parquet_use_custom_encoder", false, true, "Enable custom Parquet encoder."}, - {"cross_join_min_rows_to_compress", 0, 10000000, "Minimal count of rows to compress block in CROSS JOIN. Zero value means - disable this threshold. This block is compressed when any of the two thresholds (by rows or by bytes) are reached."}, - {"cross_join_min_bytes_to_compress", 0, 1_GiB, "Minimal size of block to compress in CROSS JOIN. Zero value means - disable this threshold. This block is compressed when any of the two thresholds (by rows or by bytes) are reached."}, - {"http_max_chunk_size", 0, 0, "Internal limitation"}, - {"prefer_external_sort_block_bytes", 0, DEFAULT_BLOCK_SIZE * 256, "Prefer maximum block bytes for external sort, reduce the memory usage during merging."}, - {"input_format_force_null_for_omitted_fields", false, false, "Disable type-defaults for omitted fields when needed"}, - {"cast_string_to_dynamic_use_inference", false, false, "Add setting to allow converting String to Dynamic through parsing"}, - {"allow_experimental_dynamic_type", false, false, "Add new experimental Dynamic type"}, - {"azure_max_blocks_in_multipart_upload", 50000, 50000, "Maximum number of blocks in multipart upload for Azure."}, - {"allow_archive_path_syntax", false, true, "Added new setting to allow disabling archive path syntax."}, - }}, - {"24.4", {{"input_format_json_throw_on_bad_escape_sequence", true, true, "Allow to save JSON strings with bad escape sequences"}, - {"max_parsing_threads", 0, 0, "Add a separate setting to control number of threads in parallel parsing from files"}, - {"ignore_drop_queries_probability", 0, 0, "Allow to ignore drop queries in server with specified probability for testing purposes"}, - {"lightweight_deletes_sync", 2, 2, "The same as 'mutation_sync', but controls only execution of lightweight deletes"}, - {"query_cache_system_table_handling", "save", "throw", "The query cache no longer caches results of queries against system tables"}, - {"input_format_json_ignore_unnecessary_fields", false, true, "Ignore unnecessary fields and not parse them. Enabling this may not throw exceptions on json strings of invalid format or with duplicated fields"}, - {"input_format_hive_text_allow_variable_number_of_columns", false, true, "Ignore extra columns in Hive Text input (if file has more columns than expected) and treat missing fields in Hive Text input as default values."}, - {"allow_experimental_database_replicated", false, true, "Database engine Replicated is now in Beta stage"}, - {"temporary_data_in_cache_reserve_space_wait_lock_timeout_milliseconds", (10 * 60 * 1000), (10 * 60 * 1000), "Wait time to lock cache for sapce reservation in temporary data in filesystem cache"}, - {"optimize_rewrite_sum_if_to_count_if", false, true, "Only available for the analyzer, where it works correctly"}, - {"azure_allow_parallel_part_upload", "true", "true", "Use multiple threads for azure multipart upload."}, - {"max_recursive_cte_evaluation_depth", DBMS_RECURSIVE_CTE_MAX_EVALUATION_DEPTH, DBMS_RECURSIVE_CTE_MAX_EVALUATION_DEPTH, "Maximum limit on recursive CTE evaluation depth"}, - {"query_plan_convert_outer_join_to_inner_join", false, true, "Allow to convert OUTER JOIN to INNER JOIN if filter after JOIN always filters default values"}, - }}, - {"24.3", {{"s3_connect_timeout_ms", 1000, 1000, "Introduce new dedicated setting for s3 connection timeout"}, - {"allow_experimental_shared_merge_tree", false, true, "The setting is obsolete"}, - {"use_page_cache_for_disks_without_file_cache", false, false, "Added userspace page cache"}, - {"read_from_page_cache_if_exists_otherwise_bypass_cache", false, false, "Added userspace page cache"}, - {"page_cache_inject_eviction", false, false, "Added userspace page cache"}, - {"default_table_engine", "None", "MergeTree", "Set default table engine to MergeTree for better usability"}, - {"input_format_json_use_string_type_for_ambiguous_paths_in_named_tuples_inference_from_objects", false, false, "Allow to use String type for ambiguous paths during named tuple inference from JSON objects"}, - {"traverse_shadow_remote_data_paths", false, false, "Traverse shadow directory when query system.remote_data_paths."}, - {"throw_if_deduplication_in_dependent_materialized_views_enabled_with_async_insert", false, true, "Deduplication in dependent materialized view cannot work together with async inserts."}, - {"parallel_replicas_allow_in_with_subquery", false, true, "If true, subquery for IN will be executed on every follower replica"}, - {"log_processors_profiles", false, true, "Enable by default"}, - {"function_locate_has_mysql_compatible_argument_order", false, true, "Increase compatibility with MySQL's locate function."}, - {"allow_suspicious_primary_key", true, false, "Forbid suspicious PRIMARY KEY/ORDER BY for MergeTree (i.e. SimpleAggregateFunction)"}, - {"filesystem_cache_reserve_space_wait_lock_timeout_milliseconds", 1000, 1000, "Wait time to lock cache for sapce reservation in filesystem cache"}, - {"max_parser_backtracks", 0, 1000000, "Limiting the complexity of parsing"}, - {"analyzer_compatibility_join_using_top_level_identifier", false, false, "Force to resolve identifier in JOIN USING from projection"}, - {"distributed_insert_skip_read_only_replicas", false, false, "If true, INSERT into Distributed will skip read-only replicas"}, - {"keeper_max_retries", 10, 10, "Max retries for general keeper operations"}, - {"keeper_retry_initial_backoff_ms", 100, 100, "Initial backoff timeout for general keeper operations"}, - {"keeper_retry_max_backoff_ms", 5000, 5000, "Max backoff timeout for general keeper operations"}, - {"s3queue_allow_experimental_sharded_mode", false, false, "Enable experimental sharded mode of S3Queue table engine. It is experimental because it will be rewritten"}, - {"allow_experimental_analyzer", false, true, "Enable analyzer and planner by default."}, - {"merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability", 0.0, 0.0, "For testing of `PartsSplitter` - split read ranges into intersecting and non intersecting every time you read from MergeTree with the specified probability."}, - {"allow_get_client_http_header", false, false, "Introduced a new function."}, - {"output_format_pretty_row_numbers", false, true, "It is better for usability."}, - {"output_format_pretty_max_value_width_apply_for_single_value", true, false, "Single values in Pretty formats won't be cut."}, - {"output_format_parquet_string_as_string", false, true, "ClickHouse allows arbitrary binary data in the String data type, which is typically UTF-8. Parquet/ORC/Arrow Strings only support UTF-8. That's why you can choose which Arrow's data type to use for the ClickHouse String data type - String or Binary. While Binary would be more correct and compatible, using String by default will correspond to user expectations in most cases."}, - {"output_format_orc_string_as_string", false, true, "ClickHouse allows arbitrary binary data in the String data type, which is typically UTF-8. Parquet/ORC/Arrow Strings only support UTF-8. That's why you can choose which Arrow's data type to use for the ClickHouse String data type - String or Binary. While Binary would be more correct and compatible, using String by default will correspond to user expectations in most cases."}, - {"output_format_arrow_string_as_string", false, true, "ClickHouse allows arbitrary binary data in the String data type, which is typically UTF-8. Parquet/ORC/Arrow Strings only support UTF-8. That's why you can choose which Arrow's data type to use for the ClickHouse String data type - String or Binary. While Binary would be more correct and compatible, using String by default will correspond to user expectations in most cases."}, - {"output_format_parquet_compression_method", "lz4", "zstd", "Parquet/ORC/Arrow support many compression methods, including lz4 and zstd. ClickHouse supports each and every compression method. Some inferior tools, such as 'duckdb', lack support for the faster `lz4` compression method, that's why we set zstd by default."}, - {"output_format_orc_compression_method", "lz4", "zstd", "Parquet/ORC/Arrow support many compression methods, including lz4 and zstd. ClickHouse supports each and every compression method. Some inferior tools, such as 'duckdb', lack support for the faster `lz4` compression method, that's why we set zstd by default."}, - {"output_format_pretty_highlight_digit_groups", false, true, "If enabled and if output is a terminal, highlight every digit corresponding to the number of thousands, millions, etc. with underline."}, - {"geo_distance_returns_float64_on_float64_arguments", false, true, "Increase the default precision."}, - {"azure_max_inflight_parts_for_one_file", 20, 20, "The maximum number of a concurrent loaded parts in multipart upload request. 0 means unlimited."}, - {"azure_strict_upload_part_size", 0, 0, "The exact size of part to upload during multipart upload to Azure blob storage."}, - {"azure_min_upload_part_size", 16*1024*1024, 16*1024*1024, "The minimum size of part to upload during multipart upload to Azure blob storage."}, - {"azure_max_upload_part_size", 5ull*1024*1024*1024, 5ull*1024*1024*1024, "The maximum size of part to upload during multipart upload to Azure blob storage."}, - {"azure_upload_part_size_multiply_factor", 2, 2, "Multiply azure_min_upload_part_size by this factor each time azure_multiply_parts_count_threshold parts were uploaded from a single write to Azure blob storage."}, - {"azure_upload_part_size_multiply_parts_count_threshold", 500, 500, "Each time this number of parts was uploaded to Azure blob storage, azure_min_upload_part_size is multiplied by azure_upload_part_size_multiply_factor."}, - {"output_format_csv_serialize_tuple_into_separate_columns", true, true, "A new way of how interpret tuples in CSV format was added."}, - {"input_format_csv_deserialize_separate_columns_into_tuple", true, true, "A new way of how interpret tuples in CSV format was added."}, - {"input_format_csv_try_infer_strings_from_quoted_tuples", true, true, "A new way of how interpret tuples in CSV format was added."}, - }}, - {"24.2", {{"allow_suspicious_variant_types", true, false, "Don't allow creating Variant type with suspicious variants by default"}, - {"validate_experimental_and_suspicious_types_inside_nested_types", false, true, "Validate usage of experimental and suspicious types inside nested types"}, - {"output_format_values_escape_quote_with_quote", false, false, "If true escape ' with '', otherwise quoted with \\'"}, - {"output_format_pretty_single_large_number_tip_threshold", 0, 1'000'000, "Print a readable number tip on the right side of the table if the block consists of a single number which exceeds this value (except 0)"}, - {"input_format_try_infer_exponent_floats", true, false, "Don't infer floats in exponential notation by default"}, - {"query_plan_optimize_prewhere", true, true, "Allow to push down filter to PREWHERE expression for supported storages"}, - {"async_insert_max_data_size", 1000000, 10485760, "The previous value appeared to be too small."}, - {"async_insert_poll_timeout_ms", 10, 10, "Timeout in milliseconds for polling data from asynchronous insert queue"}, - {"async_insert_use_adaptive_busy_timeout", false, true, "Use adaptive asynchronous insert timeout"}, - {"async_insert_busy_timeout_min_ms", 50, 50, "The minimum value of the asynchronous insert timeout in milliseconds; it also serves as the initial value, which may be increased later by the adaptive algorithm"}, - {"async_insert_busy_timeout_max_ms", 200, 200, "The minimum value of the asynchronous insert timeout in milliseconds; async_insert_busy_timeout_ms is aliased to async_insert_busy_timeout_max_ms"}, - {"async_insert_busy_timeout_increase_rate", 0.2, 0.2, "The exponential growth rate at which the adaptive asynchronous insert timeout increases"}, - {"async_insert_busy_timeout_decrease_rate", 0.2, 0.2, "The exponential growth rate at which the adaptive asynchronous insert timeout decreases"}, - {"format_template_row_format", "", "", "Template row format string can be set directly in query"}, - {"format_template_resultset_format", "", "", "Template result set format string can be set in query"}, - {"split_parts_ranges_into_intersecting_and_non_intersecting_final", true, true, "Allow to split parts ranges into intersecting and non intersecting during FINAL optimization"}, - {"split_intersecting_parts_ranges_into_layers_final", true, true, "Allow to split intersecting parts ranges into layers during FINAL optimization"}, - {"azure_max_single_part_copy_size", 256*1024*1024, 256*1024*1024, "The maximum size of object to copy using single part copy to Azure blob storage."}, - {"min_external_table_block_size_rows", DEFAULT_INSERT_BLOCK_SIZE, DEFAULT_INSERT_BLOCK_SIZE, "Squash blocks passed to external table to specified size in rows, if blocks are not big enough"}, - {"min_external_table_block_size_bytes", DEFAULT_INSERT_BLOCK_SIZE * 256, DEFAULT_INSERT_BLOCK_SIZE * 256, "Squash blocks passed to external table to specified size in bytes, if blocks are not big enough."}, - {"parallel_replicas_prefer_local_join", true, true, "If true, and JOIN can be executed with parallel replicas algorithm, and all storages of right JOIN part are *MergeTree, local JOIN will be used instead of GLOBAL JOIN."}, - {"optimize_time_filter_with_preimage", true, true, "Optimize Date and DateTime predicates by converting functions into equivalent comparisons without conversions (e.g. toYear(col) = 2023 -> col >= '2023-01-01' AND col <= '2023-12-31')"}, - {"extract_key_value_pairs_max_pairs_per_row", 0, 0, "Max number of pairs that can be produced by the `extractKeyValuePairs` function. Used as a safeguard against consuming too much memory."}, - {"default_view_definer", "CURRENT_USER", "CURRENT_USER", "Allows to set default `DEFINER` option while creating a view"}, - {"default_materialized_view_sql_security", "DEFINER", "DEFINER", "Allows to set a default value for SQL SECURITY option when creating a materialized view"}, - {"default_normal_view_sql_security", "INVOKER", "INVOKER", "Allows to set default `SQL SECURITY` option while creating a normal view"}, - {"mysql_map_string_to_text_in_show_columns", false, true, "Reduce the configuration effort to connect ClickHouse with BI tools."}, - {"mysql_map_fixed_string_to_text_in_show_columns", false, true, "Reduce the configuration effort to connect ClickHouse with BI tools."}, - }}, - {"24.1", {{"print_pretty_type_names", false, true, "Better user experience."}, - {"input_format_json_read_bools_as_strings", false, true, "Allow to read bools as strings in JSON formats by default"}, - {"output_format_arrow_use_signed_indexes_for_dictionary", false, true, "Use signed indexes type for Arrow dictionaries by default as it's recommended"}, - {"allow_experimental_variant_type", false, false, "Add new experimental Variant type"}, - {"use_variant_as_common_type", false, false, "Allow to use Variant in if/multiIf if there is no common type"}, - {"output_format_arrow_use_64_bit_indexes_for_dictionary", false, false, "Allow to use 64 bit indexes type in Arrow dictionaries"}, - {"parallel_replicas_mark_segment_size", 128, 128, "Add new setting to control segment size in new parallel replicas coordinator implementation"}, - {"ignore_materialized_views_with_dropped_target_table", false, false, "Add new setting to allow to ignore materialized views with dropped target table"}, - {"output_format_compression_level", 3, 3, "Allow to change compression level in the query output"}, - {"output_format_compression_zstd_window_log", 0, 0, "Allow to change zstd window log in the query output when zstd compression is used"}, - {"enable_zstd_qat_codec", false, false, "Add new ZSTD_QAT codec"}, - {"enable_vertical_final", false, true, "Use vertical final by default"}, - {"output_format_arrow_use_64_bit_indexes_for_dictionary", false, false, "Allow to use 64 bit indexes type in Arrow dictionaries"}, - {"max_rows_in_set_to_optimize_join", 100000, 0, "Disable join optimization as it prevents from read in order optimization"}, - {"output_format_pretty_color", true, "auto", "Setting is changed to allow also for auto value, disabling ANSI escapes if output is not a tty"}, - {"function_visible_width_behavior", 0, 1, "We changed the default behavior of `visibleWidth` to be more precise"}, - {"max_estimated_execution_time", 0, 0, "Separate max_execution_time and max_estimated_execution_time"}, - {"iceberg_engine_ignore_schema_evolution", false, false, "Allow to ignore schema evolution in Iceberg table engine"}, - {"optimize_injective_functions_in_group_by", false, true, "Replace injective functions by it's arguments in GROUP BY section in analyzer"}, - {"update_insert_deduplication_token_in_dependent_materialized_views", false, false, "Allow to update insert deduplication token with table identifier during insert in dependent materialized views"}, - {"azure_max_unexpected_write_error_retries", 4, 4, "The maximum number of retries in case of unexpected errors during Azure blob storage write"}, - {"split_parts_ranges_into_intersecting_and_non_intersecting_final", false, true, "Allow to split parts ranges into intersecting and non intersecting during FINAL optimization"}, - {"split_intersecting_parts_ranges_into_layers_final", true, true, "Allow to split intersecting parts ranges into layers during FINAL optimization"}}}, - {"23.12", {{"allow_suspicious_ttl_expressions", true, false, "It is a new setting, and in previous versions the behavior was equivalent to allowing."}, - {"input_format_parquet_allow_missing_columns", false, true, "Allow missing columns in Parquet files by default"}, - {"input_format_orc_allow_missing_columns", false, true, "Allow missing columns in ORC files by default"}, - {"input_format_arrow_allow_missing_columns", false, true, "Allow missing columns in Arrow files by default"}}}, - {"23.11", {{"parsedatetime_parse_without_leading_zeros", false, true, "Improved compatibility with MySQL DATE_FORMAT/STR_TO_DATE"}}}, - {"23.9", {{"optimize_group_by_constant_keys", false, true, "Optimize group by constant keys by default"}, - {"input_format_json_try_infer_named_tuples_from_objects", false, true, "Try to infer named Tuples from JSON objects by default"}, - {"input_format_json_read_numbers_as_strings", false, true, "Allow to read numbers as strings in JSON formats by default"}, - {"input_format_json_read_arrays_as_strings", false, true, "Allow to read arrays as strings in JSON formats by default"}, - {"input_format_json_infer_incomplete_types_as_strings", false, true, "Allow to infer incomplete types as Strings in JSON formats by default"}, - {"input_format_json_try_infer_numbers_from_strings", true, false, "Don't infer numbers from strings in JSON formats by default to prevent possible parsing errors"}, - {"http_write_exception_in_output_format", false, true, "Output valid JSON/XML on exception in HTTP streaming."}}}, - {"23.8", {{"rewrite_count_distinct_if_with_count_distinct_implementation", false, true, "Rewrite countDistinctIf with count_distinct_implementation configuration"}}}, - {"23.7", {{"function_sleep_max_microseconds_per_block", 0, 3000000, "In previous versions, the maximum sleep time of 3 seconds was applied only for `sleep`, but not for `sleepEachRow` function. In the new version, we introduce this setting. If you set compatibility with the previous versions, we will disable the limit altogether."}}}, - {"23.6", {{"http_send_timeout", 180, 30, "3 minutes seems crazy long. Note that this is timeout for a single network write call, not for the whole upload operation."}, - {"http_receive_timeout", 180, 30, "See http_send_timeout."}}}, - {"23.5", {{"input_format_parquet_preserve_order", true, false, "Allow Parquet reader to reorder rows for better parallelism."}, - {"parallelize_output_from_storages", false, true, "Allow parallelism when executing queries that read from file/url/s3/etc. This may reorder rows."}, - {"use_with_fill_by_sorting_prefix", false, true, "Columns preceding WITH FILL columns in ORDER BY clause form sorting prefix. Rows with different values in sorting prefix are filled independently"}, - {"output_format_parquet_compliant_nested_types", false, true, "Change an internal field name in output Parquet file schema."}}}, - {"23.4", {{"allow_suspicious_indices", true, false, "If true, index can defined with identical expressions"}, - {"allow_nonconst_timezone_arguments", true, false, "Allow non-const timezone arguments in certain time-related functions like toTimeZone(), fromUnixTimestamp*(), snowflakeToDateTime*()."}, - {"connect_timeout_with_failover_ms", 50, 1000, "Increase default connect timeout because of async connect"}, - {"connect_timeout_with_failover_secure_ms", 100, 1000, "Increase default secure connect timeout because of async connect"}, - {"hedged_connection_timeout_ms", 100, 50, "Start new connection in hedged requests after 50 ms instead of 100 to correspond with previous connect timeout"}, - {"formatdatetime_f_prints_single_zero", true, false, "Improved compatibility with MySQL DATE_FORMAT()/STR_TO_DATE()"}, - {"formatdatetime_parsedatetime_m_is_month_name", false, true, "Improved compatibility with MySQL DATE_FORMAT/STR_TO_DATE"}}}, - {"23.3", {{"output_format_parquet_version", "1.0", "2.latest", "Use latest Parquet format version for output format"}, - {"input_format_json_ignore_unknown_keys_in_named_tuple", false, true, "Improve parsing JSON objects as named tuples"}, - {"input_format_native_allow_types_conversion", false, true, "Allow types conversion in Native input forma"}, - {"output_format_arrow_compression_method", "none", "lz4_frame", "Use lz4 compression in Arrow output format by default"}, - {"output_format_parquet_compression_method", "snappy", "lz4", "Use lz4 compression in Parquet output format by default"}, - {"output_format_orc_compression_method", "none", "lz4_frame", "Use lz4 compression in ORC output format by default"}, - {"async_query_sending_for_remote", false, true, "Create connections and send query async across shards"}}}, - {"23.2", {{"output_format_parquet_fixed_string_as_fixed_byte_array", false, true, "Use Parquet FIXED_LENGTH_BYTE_ARRAY type for FixedString by default"}, - {"output_format_arrow_fixed_string_as_fixed_byte_array", false, true, "Use Arrow FIXED_SIZE_BINARY type for FixedString by default"}, - {"query_plan_remove_redundant_distinct", false, true, "Remove redundant Distinct step in query plan"}, - {"optimize_duplicate_order_by_and_distinct", true, false, "Remove duplicate ORDER BY and DISTINCT if it's possible"}, - {"insert_keeper_max_retries", 0, 20, "Enable reconnections to Keeper on INSERT, improve reliability"}}}, - {"23.1", {{"input_format_json_read_objects_as_strings", 0, 1, "Enable reading nested json objects as strings while object type is experimental"}, - {"input_format_json_defaults_for_missing_elements_in_named_tuple", false, true, "Allow missing elements in JSON objects while reading named tuples by default"}, - {"input_format_csv_detect_header", false, true, "Detect header in CSV format by default"}, - {"input_format_tsv_detect_header", false, true, "Detect header in TSV format by default"}, - {"input_format_custom_detect_header", false, true, "Detect header in CustomSeparated format by default"}, - {"query_plan_remove_redundant_sorting", false, true, "Remove redundant sorting in query plan. For example, sorting steps related to ORDER BY clauses in subqueries"}}}, - {"22.12", {{"max_size_to_preallocate_for_aggregation", 10'000'000, 100'000'000, "This optimizes performance"}, - {"query_plan_aggregation_in_order", 0, 1, "Enable some refactoring around query plan"}, - {"format_binary_max_string_size", 0, 1_GiB, "Prevent allocating large amount of memory"}}}, - {"22.11", {{"use_structure_from_insertion_table_in_table_functions", 0, 2, "Improve using structure from insertion table in table functions"}}}, - {"22.9", {{"force_grouping_standard_compatibility", false, true, "Make GROUPING function output the same as in SQL standard and other DBMS"}}}, - {"22.7", {{"cross_to_inner_join_rewrite", 1, 2, "Force rewrite comma join to inner"}, - {"enable_positional_arguments", false, true, "Enable positional arguments feature by default"}, - {"format_csv_allow_single_quotes", true, false, "Most tools don't treat single quote in CSV specially, don't do it by default too"}}}, - {"22.6", {{"output_format_json_named_tuples_as_objects", false, true, "Allow to serialize named tuples as JSON objects in JSON formats by default"}, - {"input_format_skip_unknown_fields", false, true, "Optimize reading subset of columns for some input formats"}}}, - {"22.5", {{"memory_overcommit_ratio_denominator", 0, 1073741824, "Enable memory overcommit feature by default"}, - {"memory_overcommit_ratio_denominator_for_user", 0, 1073741824, "Enable memory overcommit feature by default"}}}, - {"22.4", {{"allow_settings_after_format_in_insert", true, false, "Do not allow SETTINGS after FORMAT for INSERT queries because ClickHouse interpret SETTINGS as some values, which is misleading"}}}, - {"22.3", {{"cast_ipv4_ipv6_default_on_conversion_error", true, false, "Make functions cast(value, 'IPv4') and cast(value, 'IPv6') behave same as toIPv4 and toIPv6 functions"}}}, - {"21.12", {{"stream_like_engine_allow_direct_select", true, false, "Do not allow direct select for Kafka/RabbitMQ/FileLog by default"}}}, - {"21.9", {{"output_format_decimal_trailing_zeros", true, false, "Do not output trailing zeros in text representation of Decimal types by default for better looking output"}, - {"use_hedged_requests", false, true, "Enable Hedged Requests feature by default"}}}, - {"21.7", {{"legacy_column_name_of_tuple_literal", true, false, "Add this setting only for compatibility reasons. It makes sense to set to 'true', while doing rolling update of cluster from version lower than 21.7 to higher"}}}, - {"21.5", {{"async_socket_for_remote", false, true, "Fix all problems and turn on asynchronous reads from socket for remote queries by default again"}}}, - {"21.3", {{"async_socket_for_remote", true, false, "Turn off asynchronous reads from socket for remote queries because of some problems"}, - {"optimize_normalize_count_variants", false, true, "Rewrite aggregate functions that semantically equals to count() as count() by default"}, - {"normalize_function_names", false, true, "Normalize function names to their canonical names, this was needed for projection query routing"}}}, - {"21.2", {{"enable_global_with_statement", false, true, "Propagate WITH statements to UNION queries and all subqueries by default"}}}, - {"21.1", {{"insert_quorum_parallel", false, true, "Use parallel quorum inserts by default. It is significantly more convenient to use than sequential quorum inserts"}, - {"input_format_null_as_default", false, true, "Allow to insert NULL as default for input formats by default"}, - {"optimize_on_insert", false, true, "Enable data optimization on INSERT by default for better user experience"}, - {"use_compact_format_in_distributed_parts_names", false, true, "Use compact format for async INSERT into Distributed tables by default"}}}, - {"20.10", {{"format_regexp_escaping_rule", "Escaped", "Raw", "Use Raw as default escaping rule for Regexp format to male the behaviour more like to what users expect"}}}, - {"20.7", {{"show_table_uuid_in_table_create_query_if_not_nil", true, false, "Stop showing UID of the table in its CREATE query for Engine=Atomic"}}}, - {"20.5", {{"input_format_with_names_use_header", false, true, "Enable using header with names for formats with WithNames/WithNamesAndTypes suffixes"}, - {"allow_suspicious_codecs", true, false, "Don't allow to specify meaningless compression codecs"}}}, - {"20.4", {{"validate_polygons", false, true, "Throw exception if polygon is invalid in function pointInPolygon by default instead of returning possibly wrong results"}}}, - {"19.18", {{"enable_scalar_subquery_optimization", false, true, "Prevent scalar subqueries from (de)serializing large scalar values and possibly avoid running the same subquery more than once"}}}, - {"19.14", {{"any_join_distinct_right_table_keys", true, false, "Disable ANY RIGHT and ANY FULL JOINs by default to avoid inconsistency"}}}, - {"19.12", {{"input_format_defaults_for_omitted_fields", false, true, "Enable calculation of complex default expressions for omitted fields for some input formats, because it should be the expected behaviour"}}}, - {"19.5", {{"max_partitions_per_insert_block", 0, 100, "Add a limit for the number of partitions in one block"}}}, - {"18.12.17", {{"enable_optimize_predicate_expression", 0, 1, "Optimize predicates to subqueries by default"}}}, {"24.12", { } @@ -338,6 +76,7 @@ static std::initializer_list Date: Mon, 29 Jul 2024 12:54:36 +0000 Subject: [PATCH 598/661] Trying to fix test_cache_evicted_by_temporary_data and print debug info --- .../config.d/storage_configuration.xml | 6 +-- .../test_temporary_data_in_cache/test.py | 44 ++++++++++++------- 2 files changed, 31 insertions(+), 19 deletions(-) diff --git a/tests/integration/test_temporary_data_in_cache/configs/config.d/storage_configuration.xml b/tests/integration/test_temporary_data_in_cache/configs/config.d/storage_configuration.xml index 5a087d03266..107864fde0c 100644 --- a/tests/integration/test_temporary_data_in_cache/configs/config.d/storage_configuration.xml +++ b/tests/integration/test_temporary_data_in_cache/configs/config.d/storage_configuration.xml @@ -10,9 +10,9 @@ cache local_disk /tiny_local_cache/ - 10M - 1M - 1M + 12M + 100K + 100K 1 diff --git a/tests/integration/test_temporary_data_in_cache/test.py b/tests/integration/test_temporary_data_in_cache/test.py index cab134dcce2..abdfb5f4064 100644 --- a/tests/integration/test_temporary_data_in_cache/test.py +++ b/tests/integration/test_temporary_data_in_cache/test.py @@ -7,6 +7,9 @@ import fnmatch from helpers.cluster import ClickHouseCluster from helpers.client import QueryRuntimeException + +MB = 1024 * 1024 + cluster = ClickHouseCluster(__file__) node = cluster.add_instance( @@ -36,15 +39,28 @@ def test_cache_evicted_by_temporary_data(start_cluster): q("SELECT sum(size) FROM system.filesystem_cache").strip() ) - assert get_cache_size() == 0 + def dump_debug_info(): + return "\n".join( + [ + ">>> filesystem_cache <<<", + q("SELECT * FROM system.filesystem_cache FORMAT Vertical"), + ">>> remote_data_paths <<<", + q("SELECT * FROM system.remote_data_paths FORMAT Vertical"), + ">>> tiny_local_cache_local_disk <<<", + q( + "SELECT * FROM system.disks WHERE name = 'tiny_local_cache_local_disk' FORMAT Vertical" + ), + ] + ) - assert get_free_space() > 8 * 1024 * 1024 + assert get_cache_size() == 0, dump_debug_info() + assert get_free_space() > 8 * MB, dump_debug_info() # Codec is NONE to make cache size predictable q( - "CREATE TABLE t1 (x UInt64 CODEC(NONE), y UInt64 CODEC(NONE)) ENGINE = MergeTree ORDER BY x SETTINGS storage_policy = 'tiny_local_cache'" + "CREATE TABLE t1 (x UInt64 CODEC(NONE)) ENGINE = MergeTree ORDER BY x SETTINGS storage_policy = 'tiny_local_cache'" ) - q("INSERT INTO t1 SELECT number, number FROM numbers(1024 * 1024)") + q("INSERT INTO t1 SELECT number FROM numbers(1024 * 1024)") # To be sure that nothing is reading the cache and entries for t1 can be evited q("OPTIMIZE TABLE t1 FINAL") @@ -54,11 +70,11 @@ def test_cache_evicted_by_temporary_data(start_cluster): q("SELECT sum(x) FROM t1") cache_size_with_t1 = get_cache_size() - assert cache_size_with_t1 > 8 * 1024 * 1024 + assert cache_size_with_t1 > 8 * MB, dump_debug_info() # Almost all disk space is occupied by t1 cache free_space_with_t1 = get_free_space() - assert free_space_with_t1 < 4 * 1024 * 1024 + assert free_space_with_t1 < 4 * MB, dump_debug_info() # Try to sort the table, but fail because of lack of disk space with pytest.raises(QueryRuntimeException) as exc: @@ -76,31 +92,27 @@ def test_cache_evicted_by_temporary_data(start_cluster): # Some data evicted from cache by temporary data cache_size_after_eviction = get_cache_size() - assert cache_size_after_eviction < cache_size_with_t1 + assert cache_size_after_eviction < cache_size_with_t1, dump_debug_info() # Disk space freed, at least 3 MB, because temporary data tried to write 4 MB - assert get_free_space() > free_space_with_t1 + 3 * 1024 * 1024 + assert get_free_space() > free_space_with_t1 + 3 * MB, dump_debug_info() # Read some data to fill the cache again - q("SELECT avg(y) FROM t1") + q("SELECT avg(x) FROM t1") cache_size_with_t1 = get_cache_size() - assert cache_size_with_t1 > 8 * 1024 * 1024, q( - "SELECT * FROM system.filesystem_cache FORMAT Vertical" - ) + assert cache_size_with_t1 > 8 * MB, dump_debug_info() # Almost all disk space is occupied by t1 cache free_space_with_t1 = get_free_space() - assert free_space_with_t1 < 4 * 1024 * 1024, q( - "SELECT * FROM system.disks WHERE name = 'tiny_local_cache_local_disk' FORMAT Vertical" - ) + assert free_space_with_t1 < 4 * MB, dump_debug_info() node.http_query( "SELECT randomPrintableASCII(1024) FROM numbers(8 * 1024) FORMAT TSV", params={"buffer_size": 0, "wait_end_of_query": 1}, ) - assert get_free_space() > free_space_with_t1 + 3 * 1024 * 1024 + assert get_free_space() > free_space_with_t1 + 3 * MB, dump_debug_info() # not enough space for buffering 32 MB with pytest.raises(Exception) as exc: From 02e48436057e45a884a1381e1c9cda9e1fe7de17 Mon Sep 17 00:00:00 2001 From: vdimir Date: Tue, 30 Jul 2024 09:30:39 +0000 Subject: [PATCH 599/661] test_cache_evicted_by_temporary_data drop cache --- .../test_temporary_data_in_cache/test.py | 30 ++++++++++--------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/tests/integration/test_temporary_data_in_cache/test.py b/tests/integration/test_temporary_data_in_cache/test.py index abdfb5f4064..87192a20975 100644 --- a/tests/integration/test_temporary_data_in_cache/test.py +++ b/tests/integration/test_temporary_data_in_cache/test.py @@ -39,19 +39,21 @@ def test_cache_evicted_by_temporary_data(start_cluster): q("SELECT sum(size) FROM system.filesystem_cache").strip() ) - def dump_debug_info(): - return "\n".join( - [ - ">>> filesystem_cache <<<", - q("SELECT * FROM system.filesystem_cache FORMAT Vertical"), - ">>> remote_data_paths <<<", - q("SELECT * FROM system.remote_data_paths FORMAT Vertical"), - ">>> tiny_local_cache_local_disk <<<", - q( - "SELECT * FROM system.disks WHERE name = 'tiny_local_cache_local_disk' FORMAT Vertical" - ), - ] - ) + dump_debug_info = lambda: "\n".join( + [ + ">>> filesystem_cache <<<", + q("SELECT * FROM system.filesystem_cache FORMAT Vertical"), + ">>> remote_data_paths <<<", + q("SELECT * FROM system.remote_data_paths FORMAT Vertical"), + ">>> tiny_local_cache_local_disk <<<", + q( + "SELECT * FROM system.disks WHERE name = 'tiny_local_cache_local_disk' FORMAT Vertical" + ), + ] + ) + + q("SYSTEM DROP FILESYSTEM CACHE") + q("DROP TABLE IF EXISTS t1 SYNC") assert get_cache_size() == 0, dump_debug_info() assert get_free_space() > 8 * MB, dump_debug_info() @@ -124,4 +126,4 @@ def test_cache_evicted_by_temporary_data(start_cluster): str(exc.value), "*Failed to reserve * for temporary file*" ), exc.value - q("DROP TABLE IF EXISTS t1") + q("DROP TABLE IF EXISTS t1 SYNC") From 092c837119a9be11cfcc85b4696e9a9c74d9bbc8 Mon Sep 17 00:00:00 2001 From: Michael Stetsyuk Date: Fri, 2 Aug 2024 12:13:26 +0100 Subject: [PATCH 600/661] randomize table name in test_storage_delta --- tests/integration/test_storage_delta/test.py | 53 +++++++------------- 1 file changed, 17 insertions(+), 36 deletions(-) diff --git a/tests/integration/test_storage_delta/test.py b/tests/integration/test_storage_delta/test.py index 92a870ab360..054b79ff6fe 100644 --- a/tests/integration/test_storage_delta/test.py +++ b/tests/integration/test_storage_delta/test.py @@ -8,7 +8,8 @@ import os import json import time import glob -import shutil +import random +import string import pyspark import delta @@ -53,6 +54,11 @@ def get_spark(): return builder.master("local").getOrCreate() +def randomize_table_name(table_name, random_suffix_length=10): + letters = string.ascii_letters + string.digits + return f"{table_name}{''.join(random.choice(letters) for _ in range(random_suffix_length))}" + + @pytest.fixture(scope="module") def started_cluster(): try: @@ -152,7 +158,7 @@ def test_single_log_file(started_cluster): spark = started_cluster.spark_session minio_client = started_cluster.minio_client bucket = started_cluster.minio_bucket - TABLE_NAME = "test_single_log_file" + TABLE_NAME = randomize_table_name("test_single_log_file") inserted_data = "SELECT number as a, toString(number + 1) as b FROM numbers(100)" parquet_data_path = create_initial_data_file( @@ -170,16 +176,13 @@ def test_single_log_file(started_cluster): inserted_data ) - os.unlink(parquet_data_path) - shutil.rmtree(f"/{TABLE_NAME}") - def test_partition_by(started_cluster): instance = started_cluster.instances["node1"] spark = started_cluster.spark_session minio_client = started_cluster.minio_client bucket = started_cluster.minio_bucket - TABLE_NAME = "test_partition_by" + TABLE_NAME = randomize_table_name("test_partition_by") write_delta_from_df( spark, @@ -195,15 +198,13 @@ def test_partition_by(started_cluster): create_delta_table(instance, TABLE_NAME) assert int(instance.query(f"SELECT count() FROM {TABLE_NAME}")) == 10 - shutil.rmtree(f"/{TABLE_NAME}") - def test_checkpoint(started_cluster): instance = started_cluster.instances["node1"] spark = started_cluster.spark_session minio_client = started_cluster.minio_client bucket = started_cluster.minio_bucket - TABLE_NAME = "test_checkpoint" + TABLE_NAME = randomize_table_name("test_checkpoint") write_delta_from_df( spark, @@ -272,16 +273,13 @@ def test_checkpoint(started_cluster): ).strip() ) - shutil.rmtree(f"/{TABLE_NAME}") - spark.sql(f"DROP TABLE {TABLE_NAME}") - def test_multiple_log_files(started_cluster): instance = started_cluster.instances["node1"] spark = started_cluster.spark_session minio_client = started_cluster.minio_client bucket = started_cluster.minio_bucket - TABLE_NAME = "test_multiple_log_files" + TABLE_NAME = randomize_table_name("test_multiple_log_files") write_delta_from_df( spark, generate_data(spark, 0, 100), f"/{TABLE_NAME}", mode="overwrite" @@ -313,15 +311,13 @@ def test_multiple_log_files(started_cluster): "SELECT number, toString(number + 1) FROM numbers(200)" ) - shutil.rmtree(f"/{TABLE_NAME}") - def test_metadata(started_cluster): instance = started_cluster.instances["node1"] spark = started_cluster.spark_session minio_client = started_cluster.minio_client bucket = started_cluster.minio_bucket - TABLE_NAME = "test_metadata" + TABLE_NAME = randomize_table_name("test_metadata") parquet_data_path = create_initial_data_file( started_cluster, @@ -348,14 +344,11 @@ def test_metadata(started_cluster): create_delta_table(instance, TABLE_NAME) assert int(instance.query(f"SELECT count() FROM {TABLE_NAME}")) == 100 - os.unlink(parquet_data_path) - shutil.rmtree(f"/{TABLE_NAME}") - def test_types(started_cluster): - TABLE_NAME = "test_types" + TABLE_NAME = randomize_table_name("test_types") spark = started_cluster.spark_session - result_file = f"{TABLE_NAME}_result_2" + result_file = randomize_table_name(f"{TABLE_NAME}_result_2") delta_table = ( DeltaTable.create(spark) @@ -423,16 +416,13 @@ def test_types(started_cluster): ] ) - shutil.rmtree(f"/{result_file}") - spark.sql(f"DROP TABLE {TABLE_NAME}") - def test_restart_broken(started_cluster): instance = started_cluster.instances["node1"] spark = started_cluster.spark_session minio_client = started_cluster.minio_client bucket = "broken" - TABLE_NAME = "test_restart_broken" + TABLE_NAME = randomize_table_name("test_restart_broken") if not minio_client.bucket_exists(bucket): minio_client.make_bucket(bucket) @@ -487,16 +477,13 @@ def test_restart_broken(started_cluster): assert int(instance.query(f"SELECT count() FROM {TABLE_NAME}")) == 100 - os.unlink(parquet_data_path) - shutil.rmtree(f"/{TABLE_NAME}") - def test_restart_broken_table_function(started_cluster): instance = started_cluster.instances["node1"] spark = started_cluster.spark_session minio_client = started_cluster.minio_client bucket = "broken2" - TABLE_NAME = "test_restart_broken_table_function" + TABLE_NAME = randomize_table_name("test_restart_broken_table_function") if not minio_client.bucket_exists(bucket): minio_client.make_bucket(bucket) @@ -544,16 +531,13 @@ def test_restart_broken_table_function(started_cluster): assert int(instance.query(f"SELECT count() FROM {TABLE_NAME}")) == 100 - os.unlink(parquet_data_path) - shutil.rmtree(f"/{TABLE_NAME}") - def test_partition_columns(started_cluster): instance = started_cluster.instances["node1"] spark = started_cluster.spark_session minio_client = started_cluster.minio_client bucket = started_cluster.minio_bucket - TABLE_NAME = "test_partition_columns" + TABLE_NAME = randomize_table_name("test_partition_columns") result_file = f"{TABLE_NAME}" partition_columns = ["b", "c", "d", "e"] @@ -744,6 +728,3 @@ SELECT * FROM deltaLake('http://{started_cluster.minio_ip}:{started_cluster.mini ) == 1 ) - - shutil.rmtree(f"/{TABLE_NAME}") - spark.sql(f"DROP TABLE {TABLE_NAME}") From 7d1e958097e716f3ea1e0b7e51d6dfa575229c4c Mon Sep 17 00:00:00 2001 From: Nikita Fomichev Date: Fri, 2 Aug 2024 13:32:59 +0200 Subject: [PATCH 601/661] Integration tests: fix ports clashing problem --- tests/integration/conftest.py | 44 ++++++++++++++++++ tests/integration/helpers/cluster.py | 67 ++++++++++++++++++++++++---- 2 files changed, 103 insertions(+), 8 deletions(-) diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index f4be31cc532..0a47840ede3 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -2,6 +2,8 @@ import logging import os +import socket +import multiprocessing import pytest # pylint:disable=import-error; for style check from helpers.cluster import run_and_check @@ -11,6 +13,7 @@ from helpers.network import _NetworkManager # # [1]: https://github.com/pytest-dev/pytest/issues/5502 logging.raiseExceptions = False +PORTS_PER_WORKER = 50 @pytest.fixture(scope="session", autouse=True) @@ -111,5 +114,46 @@ def pytest_addoption(parser): ) +def get_n_free_ports(total): + ports = [] + + while len(ports) < total: + with socket.socket() as s: + s.bind(("", 0)) + ports.append(s.getsockname()[1]) + + return ports + + def pytest_configure(config): os.environ["INTEGRATION_TESTS_RUN_ID"] = config.option.run_id + + # When running tests without pytest-xdist, + # the `pytest_xdist_setupnodes` hook is not executed + worker_ports = os.getenv("WORKER_FREE_PORTS", None) + if worker_ports is None: + os.environ["WORKER_FREE_PORTS"] = " ".join( + ([str(p) for p in get_n_free_ports(PORTS_PER_WORKER)]) + ) + + +def pytest_xdist_setupnodes(config, specs): + # Find {PORTS_PER_WORKER} * {number of xdist workers} ports and + # allocate pool of {PORTS_PER_WORKER} ports to each worker + + # Get number of xdist workers + num_workers = 1 + if os.environ.get("PYTEST_XDIST_WORKER", "master") == "master": + num_workers = config.getoption("numprocesses", 1) + if num_workers == "auto": + num_workers = multiprocessing.cpu_count() + + # Get free ports which will be distributed across workers + ports = get_n_free_ports(num_workers * PORTS_PER_WORKER) + + # Iterate over specs of workers and add allocated ports to env variable + for i, spec in enumerate(specs): + start_range = i * PORTS_PER_WORKER + spec.env["WORKER_FREE_PORTS"] = " ".join( + ([str(p) for p in ports[start_range : start_range + PORTS_PER_WORKER]]) + ) diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index 6bc0ece63ca..3480a3089fe 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -135,6 +135,52 @@ def get_free_port(): return s.getsockname()[1] +def is_port_free(port: int) -> bool: + try: + with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: + s.bind(("", port)) + return True + except socket.error: + return False + + +class PortPoolManager: + """ + This class is used for distribution of ports allocated to single pytest-xdist worker + It can be used by multiple ClickHouseCluster instances + """ + + # Shared between instances + all_ports = None + free_ports = None + + def __init__(self): + self.used_ports = [] + + if self.all_ports is None: + worker_ports = os.getenv("WORKER_FREE_PORTS") + ports = [int(p) for p in worker_ports.split(" ")] + + # Static vars + PortPoolManager.all_ports = ports + PortPoolManager.free_ports = ports + + def get_port(self): + for port in self.free_ports: + if is_port_free(port): + self.free_ports.remove(port) + self.used_ports.append(port) + return port + + raise Exception( + f"No free ports: {self.all_ports}", + ) + + def return_used_ports(self): + self.free_ports.extend(self.used_ports) + self.used_ports.clear() + + def retry_exception(num, delay, func, exception=Exception, *args, **kwargs): """ Retry if `func()` throws, `num` times. @@ -716,62 +762,67 @@ class ClickHouseCluster: .stop() ) + self.port_pool = PortPoolManager() + @property def kafka_port(self): if self._kafka_port: return self._kafka_port - self._kafka_port = get_free_port() + self._kafka_port = self.port_pool.get_port() return self._kafka_port @property def schema_registry_port(self): if self._schema_registry_port: return self._schema_registry_port - self._schema_registry_port = get_free_port() + self._schema_registry_port = self.port_pool.get_port() return self._schema_registry_port @property def schema_registry_auth_port(self): if self._schema_registry_auth_port: return self._schema_registry_auth_port - self._schema_registry_auth_port = get_free_port() + self._schema_registry_auth_port = self.port_pool.get_port() return self._schema_registry_auth_port @property def kerberized_kafka_port(self): if self._kerberized_kafka_port: return self._kerberized_kafka_port - self._kerberized_kafka_port = get_free_port() + self._kerberized_kafka_port = self.port_pool.get_port() return self._kerberized_kafka_port @property def azurite_port(self): if self._azurite_port: return self._azurite_port - self._azurite_port = get_free_port() + self._azurite_port = self.port_pool.get_port() return self._azurite_port @property def mongo_port(self): if self._mongo_port: return self._mongo_port - self._mongo_port = get_free_port() + self._mongo_port = self.port_pool.get_port() return self._mongo_port @property def mongo_no_cred_port(self): if self._mongo_no_cred_port: return self._mongo_no_cred_port - self._mongo_no_cred_port = get_free_port() + self._mongo_no_cred_port = self.port_pool.get_port() return self._mongo_no_cred_port @property def redis_port(self): if self._redis_port: return self._redis_port - self._redis_port = get_free_port() + self._redis_port = self.port_pool.get_port() return self._redis_port + def __exit__(self, exc_type, exc_val, exc_tb): + self.port_pool.return_used_ports() + def print_all_docker_pieces(self): res_networks = subprocess.check_output( f"docker network ls --filter name='{self.project_name}*'", From 7d45529fe8d28a6b39deb32d060343bb5d03b64f Mon Sep 17 00:00:00 2001 From: Michael Stetsyuk Date: Fri, 2 Aug 2024 12:35:40 +0100 Subject: [PATCH 602/661] randomize query id in test_checking_s3_blobs_paranoid --- .../test_checking_s3_blobs_paranoid/test.py | 31 ++++++++++++------- 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/tests/integration/test_checking_s3_blobs_paranoid/test.py b/tests/integration/test_checking_s3_blobs_paranoid/test.py index afe8449b44a..c22142046c7 100644 --- a/tests/integration/test_checking_s3_blobs_paranoid/test.py +++ b/tests/integration/test_checking_s3_blobs_paranoid/test.py @@ -4,6 +4,8 @@ import logging import pytest import os import minio +import random +import string from helpers.cluster import ClickHouseCluster from helpers.mock_servers import start_s3_mock @@ -45,6 +47,11 @@ def cluster(): cluster.shutdown() +def randomize_query_id(query_id, random_suffix_length=10): + letters = string.ascii_letters + string.digits + return f"{query_id}_{''.join(random.choice(letters) for _ in range(random_suffix_length))}" + + @pytest.fixture(scope="module") def init_broken_s3(cluster): yield start_s3_mock(cluster, "broken_s3", "8083") @@ -128,7 +135,7 @@ def test_upload_s3_fail_create_multi_part_upload(cluster, broken_s3, compression broken_s3.setup_at_create_multi_part_upload() - insert_query_id = f"INSERT_INTO_TABLE_FUNCTION_FAIL_CREATE_MPU_{compression}" + insert_query_id = randomize_query_id(f"INSERT_INTO_TABLE_FUNCTION_FAIL_CREATE_MPU_{compression}") error = node.query_and_get_error( f""" INSERT INTO @@ -170,7 +177,7 @@ def test_upload_s3_fail_upload_part_when_multi_part_upload( broken_s3.setup_fake_multpartuploads() broken_s3.setup_at_part_upload(count=1, after=2) - insert_query_id = f"INSERT_INTO_TABLE_FUNCTION_FAIL_UPLOAD_PART_{compression}" + insert_query_id = randomize_query_id(f"INSERT_INTO_TABLE_FUNCTION_FAIL_UPLOAD_PART_{compression}") error = node.query_and_get_error( f""" INSERT INTO @@ -222,7 +229,7 @@ def test_when_error_is_retried(cluster, broken_s3, action_and_message): broken_s3.setup_fake_multpartuploads() broken_s3.setup_at_part_upload(count=3, after=2, action=action) - insert_query_id = f"INSERT_INTO_TABLE_{action}_RETRIED" + insert_query_id = randomize_query_id(f"INSERT_INTO_TABLE_{action}_RETRIED") node.query( f""" INSERT INTO @@ -251,7 +258,7 @@ def test_when_error_is_retried(cluster, broken_s3, action_and_message): assert s3_errors == 3 broken_s3.setup_at_part_upload(count=1000, after=2, action=action) - insert_query_id = f"INSERT_INTO_TABLE_{action}_RETRIED_1" + insert_query_id = randomize_query_id(f"INSERT_INTO_TABLE_{action}_RETRIED_1") error = node.query_and_get_error( f""" INSERT INTO @@ -286,7 +293,7 @@ def test_when_s3_broken_pipe_at_upload_is_retried(cluster, broken_s3): action="broken_pipe", ) - insert_query_id = f"TEST_WHEN_S3_BROKEN_PIPE_AT_UPLOAD" + insert_query_id = randomize_query_id(f"TEST_WHEN_S3_BROKEN_PIPE_AT_UPLOAD") node.query( f""" INSERT INTO @@ -320,7 +327,7 @@ def test_when_s3_broken_pipe_at_upload_is_retried(cluster, broken_s3): after=2, action="broken_pipe", ) - insert_query_id = f"TEST_WHEN_S3_BROKEN_PIPE_AT_UPLOAD_1" + insert_query_id = randomize_query_id(f"TEST_WHEN_S3_BROKEN_PIPE_AT_UPLOAD_1") error = node.query_and_get_error( f""" INSERT INTO @@ -362,7 +369,7 @@ def test_when_s3_connection_reset_by_peer_at_upload_is_retried( action_args=["1"] if send_something else ["0"], ) - insert_query_id = ( + insert_query_id = randomize_query_id( f"TEST_WHEN_S3_CONNECTION_RESET_BY_PEER_AT_UPLOAD_{send_something}" ) node.query( @@ -399,7 +406,7 @@ def test_when_s3_connection_reset_by_peer_at_upload_is_retried( action="connection_reset_by_peer", action_args=["1"] if send_something else ["0"], ) - insert_query_id = ( + insert_query_id = randomize_query_id( f"TEST_WHEN_S3_CONNECTION_RESET_BY_PEER_AT_UPLOAD_{send_something}_1" ) error = node.query_and_get_error( @@ -444,7 +451,7 @@ def test_when_s3_connection_reset_by_peer_at_create_mpu_retried( action_args=["1"] if send_something else ["0"], ) - insert_query_id = ( + insert_query_id = randomize_query_id( f"TEST_WHEN_S3_CONNECTION_RESET_BY_PEER_AT_MULTIPARTUPLOAD_{send_something}" ) node.query( @@ -482,7 +489,7 @@ def test_when_s3_connection_reset_by_peer_at_create_mpu_retried( action_args=["1"] if send_something else ["0"], ) - insert_query_id = ( + insert_query_id = randomize_query_id( f"TEST_WHEN_S3_CONNECTION_RESET_BY_PEER_AT_MULTIPARTUPLOAD_{send_something}_1" ) error = node.query_and_get_error( @@ -522,7 +529,7 @@ def test_query_is_canceled_with_inf_retries(cluster, broken_s3): action="connection_refused", ) - insert_query_id = f"TEST_QUERY_IS_CANCELED_WITH_INF_RETRIES" + insert_query_id = randomize_query_id(f"TEST_QUERY_IS_CANCELED_WITH_INF_RETRIES") request = node.get_query_request( f""" INSERT INTO @@ -580,7 +587,7 @@ def test_adaptive_timeouts(cluster, broken_s3, node_name): count=1000000, ) - insert_query_id = f"TEST_ADAPTIVE_TIMEOUTS_{node_name}" + insert_query_id = randomize_query_id(f"TEST_ADAPTIVE_TIMEOUTS_{node_name}") node.query( f""" INSERT INTO From 34cba1bdda55cdd2409c535be56e4fe6165c894a Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Fri, 2 Aug 2024 11:46:03 +0000 Subject: [PATCH 603/661] Automatic style fix --- tests/integration/test_checking_s3_blobs_paranoid/test.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tests/integration/test_checking_s3_blobs_paranoid/test.py b/tests/integration/test_checking_s3_blobs_paranoid/test.py index c22142046c7..b995b4d6461 100644 --- a/tests/integration/test_checking_s3_blobs_paranoid/test.py +++ b/tests/integration/test_checking_s3_blobs_paranoid/test.py @@ -135,7 +135,9 @@ def test_upload_s3_fail_create_multi_part_upload(cluster, broken_s3, compression broken_s3.setup_at_create_multi_part_upload() - insert_query_id = randomize_query_id(f"INSERT_INTO_TABLE_FUNCTION_FAIL_CREATE_MPU_{compression}") + insert_query_id = randomize_query_id( + f"INSERT_INTO_TABLE_FUNCTION_FAIL_CREATE_MPU_{compression}" + ) error = node.query_and_get_error( f""" INSERT INTO @@ -177,7 +179,9 @@ def test_upload_s3_fail_upload_part_when_multi_part_upload( broken_s3.setup_fake_multpartuploads() broken_s3.setup_at_part_upload(count=1, after=2) - insert_query_id = randomize_query_id(f"INSERT_INTO_TABLE_FUNCTION_FAIL_UPLOAD_PART_{compression}") + insert_query_id = randomize_query_id( + f"INSERT_INTO_TABLE_FUNCTION_FAIL_UPLOAD_PART_{compression}" + ) error = node.query_and_get_error( f""" INSERT INTO From d2d8a16ca6c5c3df31a62894fe2bcfb26d570061 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Fri, 2 Aug 2024 13:51:03 +0200 Subject: [PATCH 604/661] Fix refreshable MVs --- src/Databases/DatabaseOrdinary.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/Databases/DatabaseOrdinary.cpp b/src/Databases/DatabaseOrdinary.cpp index 3ab5d3fa697..8808261654f 100644 --- a/src/Databases/DatabaseOrdinary.cpp +++ b/src/Databases/DatabaseOrdinary.cpp @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -250,6 +251,8 @@ void DatabaseOrdinary::loadTablesMetadata(ContextPtr local_context, ParsedTables convertMergeTreeToReplicatedIfNeeded(ast, qualified_name, file_name); + NormalizeSelectWithUnionQueryVisitor::Data data{local_context->getSettingsRef().union_default_mode}; + NormalizeSelectWithUnionQueryVisitor{data}.visit(ast); std::lock_guard lock{metadata.mutex}; metadata.parsed_tables[qualified_name] = ParsedTableMetadata{full_path.string(), ast}; metadata.total_dictionaries += create_query->is_dictionary; From a64e625f0ba20751f929222bd49aaf1295ff15ed Mon Sep 17 00:00:00 2001 From: Nikita Fomichev Date: Fri, 2 Aug 2024 14:03:27 +0200 Subject: [PATCH 605/661] Integration tests: fix ports clashing problem 2 --- tests/integration/conftest.py | 36 ++++++++++++++--------------------- 1 file changed, 14 insertions(+), 22 deletions(-) diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index 0a47840ede3..a386ed53009 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -2,11 +2,9 @@ import logging import os -import socket -import multiprocessing import pytest # pylint:disable=import-error; for style check -from helpers.cluster import run_and_check +from helpers.cluster import run_and_check, is_port_free from helpers.network import _NetworkManager # This is a workaround for a problem with logging in pytest [1]. @@ -114,15 +112,16 @@ def pytest_addoption(parser): ) -def get_n_free_ports(total): +def get_unique_free_ports(total): ports = [] + for port in range(30000, 55000): + if is_port_free(port) and port not in ports: + ports.append(port) - while len(ports) < total: - with socket.socket() as s: - s.bind(("", 0)) - ports.append(s.getsockname()[1]) + if len(ports) == total: + return ports - return ports + raise Exception(f"Can't collect {total} ports. Collected: {len(ports)}") def pytest_configure(config): @@ -132,9 +131,8 @@ def pytest_configure(config): # the `pytest_xdist_setupnodes` hook is not executed worker_ports = os.getenv("WORKER_FREE_PORTS", None) if worker_ports is None: - os.environ["WORKER_FREE_PORTS"] = " ".join( - ([str(p) for p in get_n_free_ports(PORTS_PER_WORKER)]) - ) + master_ports = get_unique_free_ports(PORTS_PER_WORKER) + os.environ["WORKER_FREE_PORTS"] = " ".join(([str(p) for p in master_ports])) def pytest_xdist_setupnodes(config, specs): @@ -142,18 +140,12 @@ def pytest_xdist_setupnodes(config, specs): # allocate pool of {PORTS_PER_WORKER} ports to each worker # Get number of xdist workers - num_workers = 1 - if os.environ.get("PYTEST_XDIST_WORKER", "master") == "master": - num_workers = config.getoption("numprocesses", 1) - if num_workers == "auto": - num_workers = multiprocessing.cpu_count() - + num_workers = len(specs) # Get free ports which will be distributed across workers - ports = get_n_free_ports(num_workers * PORTS_PER_WORKER) + ports = get_unique_free_ports(num_workers * PORTS_PER_WORKER) # Iterate over specs of workers and add allocated ports to env variable for i, spec in enumerate(specs): start_range = i * PORTS_PER_WORKER - spec.env["WORKER_FREE_PORTS"] = " ".join( - ([str(p) for p in ports[start_range : start_range + PORTS_PER_WORKER]]) - ) + per_workrer_ports = ports[start_range : start_range + PORTS_PER_WORKER] + spec.env["WORKER_FREE_PORTS"] = " ".join(([str(p) for p in per_workrer_ports])) From f55784c636c1dcb503dadb2f75bd6b586271bf0d Mon Sep 17 00:00:00 2001 From: vdimir Date: Fri, 2 Aug 2024 12:07:05 +0000 Subject: [PATCH 606/661] Fix 03203_client_benchmark_options --- tests/queries/0_stateless/03203_client_benchmark_options.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/03203_client_benchmark_options.sh b/tests/queries/0_stateless/03203_client_benchmark_options.sh index 37a1f2cd3ac..967db056c0b 100755 --- a/tests/queries/0_stateless/03203_client_benchmark_options.sh +++ b/tests/queries/0_stateless/03203_client_benchmark_options.sh @@ -5,10 +5,10 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CUR_DIR"/../shell_config.sh output=$(${CLICKHOUSE_CLIENT} -t -q "SELECT sleepEachRow(2) FORMAT Null" 2>&1) -{ echo "$output" | grep -q "^2\." && echo "Ok"; } || { echo "Fail"; echo "'$output'"; } +{ number=$(echo "$output" | grep -o "^[0-9]"); [[ -n "$number" && "$number" -ge 2 ]] && echo "Ok"; } || { echo "Fail"; echo "'$output'"; } output=$(${CLICKHOUSE_CLIENT} --time -q "SELECT sleepEachRow(2) FORMAT Null" 2>&1) -{ echo "$output" | grep -q "^2\." && echo "Ok"; } || { echo "Fail"; echo "'$output'"; } +{ number=$(echo "$output" | grep -o "^[0-9]"); [[ -n "$number" && "$number" -ge 2 ]] && echo "Ok"; } || { echo "Fail"; echo "'$output'"; } output=$(${CLICKHOUSE_CLIENT} --memory-usage -q "SELECT sum(number) FROM numbers(10_000) FORMAT Null" 2>&1) { echo "$output" | grep -q "^[0-9]\+$" && echo "Ok"; } || { echo "Fail"; echo "'$output'"; } From 5256e8e6d08b5076be49b56fdebadb668892771c Mon Sep 17 00:00:00 2001 From: Nikita Fomichev Date: Fri, 2 Aug 2024 14:36:33 +0200 Subject: [PATCH 607/661] Integration tests: fix ports clashing problem 3 --- tests/integration/conftest.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index a386ed53009..b4c86a1cd2f 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -132,7 +132,7 @@ def pytest_configure(config): worker_ports = os.getenv("WORKER_FREE_PORTS", None) if worker_ports is None: master_ports = get_unique_free_ports(PORTS_PER_WORKER) - os.environ["WORKER_FREE_PORTS"] = " ".join(([str(p) for p in master_ports])) + os.environ["WORKER_FREE_PORTS"] = " ".join([str(p) for p in master_ports]) def pytest_xdist_setupnodes(config, specs): @@ -148,4 +148,4 @@ def pytest_xdist_setupnodes(config, specs): for i, spec in enumerate(specs): start_range = i * PORTS_PER_WORKER per_workrer_ports = ports[start_range : start_range + PORTS_PER_WORKER] - spec.env["WORKER_FREE_PORTS"] = " ".join(([str(p) for p in per_workrer_ports])) + spec.env["WORKER_FREE_PORTS"] = " ".join([str(p) for p in per_workrer_ports]) From db0bce33526abf16e705b9e56d178d6e2c45a36b Mon Sep 17 00:00:00 2001 From: kssenii Date: Fri, 2 Aug 2024 15:01:15 +0200 Subject: [PATCH 608/661] Try make the code more understandable --- src/Interpreters/Cache/FileCache.cpp | 164 ++++++++++++------------- src/Interpreters/Cache/FileCache.h | 12 +- tests/config/config.d/storage_conf.xml | 2 +- 3 files changed, 80 insertions(+), 98 deletions(-) diff --git a/src/Interpreters/Cache/FileCache.cpp b/src/Interpreters/Cache/FileCache.cpp index 0a03f5dcc7d..4c17afb79be 100644 --- a/src/Interpreters/Cache/FileCache.cpp +++ b/src/Interpreters/Cache/FileCache.cpp @@ -318,7 +318,29 @@ FileSegments FileCache::getImpl(const LockedKey & locked_key, const FileSegment: std::vector FileCache::splitRange(size_t offset, size_t size, size_t aligned_size) { - assert(size > 0); + chassert(size > 0); + chassert(size <= aligned_size); + + /// Consider this example to understand why we need to account here for both `size` and `aligned_size`. + /// [________________]__________________] <-- requested range + /// ^ ^ + /// right offset aligned_right_offset + /// [_________] <-- last cached file segment, e.g. we have uncovered suffix of the requested range + /// [________________] + /// size + /// [____________________________________] + /// aligned_size + /// + /// So it is possible that we split this hole range into sub-segments by `max_file_segment_size` + /// and get something like this: + /// + /// [________________________] + /// ^ ^ + /// right_offset right_offset + max_file_segment_size + /// e.g. there is no need to create sub-segment for range (right_offset + max_file_segment_size, aligned_right_offset]. + /// Because its left offset would be bigger than right_offset. + /// Therefore, we set end_pos_non_included as offset+size, but remaining_size as aligned_size. + std::vector ranges; size_t current_pos = offset; @@ -339,42 +361,23 @@ std::vector FileCache::splitRange(size_t offset, size_t size return ranges; } -FileSegments FileCache::splitRangeIntoFileSegments( +FileSegments FileCache::createFileSegmentsFromRanges( LockedKey & locked_key, - size_t offset, - size_t size, - size_t aligned_size, - FileSegment::State state, + const std::vector & ranges, + size_t & file_segments_count, size_t file_segments_limit, const CreateFileSegmentSettings & create_settings) { - chassert(size > 0); - chassert(size <= aligned_size); - /// We take `size` as a soft limit and `aligned_size` as a hard limit. - - auto current_pos = offset; - auto end_pos_non_included = offset + size; - - size_t current_file_segment_size; - size_t remaining_size = aligned_size; - - FileSegments file_segments; - const size_t max_size = max_file_segment_size.load(); - while (current_pos < end_pos_non_included && (!file_segments_limit || file_segments.size() < file_segments_limit)) + FileSegments result; + for (const auto & r : ranges) { - current_file_segment_size = std::min(remaining_size, max_size); - remaining_size -= current_file_segment_size; - - auto file_segment_metadata_it = addFileSegment( - locked_key, current_pos, current_file_segment_size, state, create_settings, nullptr); - file_segments.push_back(file_segment_metadata_it->second->file_segment); - - current_pos += current_file_segment_size; + if (file_segments_limit && file_segments_count >= file_segments_limit) + break; + auto metadata_it = addFileSegment(locked_key, r.left, r.size(), FileSegment::State::EMPTY, create_settings, nullptr); + result.push_back(metadata_it->second->file_segment); + ++file_segments_count; } - - chassert(file_segments.size() == file_segments_limit || file_segments.back()->range().contains(offset + size - 1), - fmt::format("Offset: {}, size: {}, file segments: {}", offset, size, toString(file_segments))); - return file_segments; + return result; } void FileCache::fillHolesWithEmptyFileSegments( @@ -448,18 +451,9 @@ void FileCache::fillHolesWithEmptyFileSegments( } else { - auto ranges = splitRange(current_pos, hole_size, hole_size); - FileSegments hole; - for (const auto & r : ranges) - { - auto metadata_it = addFileSegment(locked_key, r.left, r.size(), FileSegment::State::EMPTY, create_settings, nullptr); - hole.push_back(metadata_it->second->file_segment); - ++processed_count; - - if (is_limit_reached()) - break; - } - file_segments.splice(it, std::move(hole)); + const auto ranges = splitRange(current_pos, hole_size, hole_size); + auto hole_segments = createFileSegmentsFromRanges(locked_key, ranges, processed_count, file_segments_limit, create_settings); + file_segments.splice(it, std::move(hole_segments)); } if (is_limit_reached()) @@ -493,29 +487,20 @@ void FileCache::fillHolesWithEmptyFileSegments( /// segmentN auto hole_size = range.right - current_pos + 1; - auto non_aligned_size = non_aligned_right_offset - current_pos + 1; + auto non_aligned_hole_size = non_aligned_right_offset - current_pos + 1; if (fill_with_detached_file_segments) { auto file_segment = std::make_shared( - locked_key.getKey(), current_pos, hole_size, FileSegment::State::DETACHED, create_settings); + locked_key.getKey(), current_pos, non_aligned_hole_size, FileSegment::State::DETACHED, create_settings); file_segments.insert(file_segments.end(), file_segment); } else { - auto ranges = splitRange(current_pos, non_aligned_size, hole_size); - FileSegments hole; - for (const auto & r : ranges) - { - auto metadata_it = addFileSegment(locked_key, r.left, r.size(), FileSegment::State::EMPTY, create_settings, nullptr); - hole.push_back(metadata_it->second->file_segment); - ++processed_count; - - if (is_limit_reached()) - break; - } - file_segments.splice(it, std::move(hole)); + const auto ranges = splitRange(current_pos, non_aligned_hole_size, hole_size); + auto hole_segments = createFileSegmentsFromRanges(locked_key, ranges, processed_count, file_segments_limit, create_settings); + file_segments.splice(it, std::move(hole_segments)); if (is_limit_reached()) erase_unprocessed(); @@ -548,8 +533,9 @@ FileSegmentsHolderPtr FileCache::set( } else { - file_segments = splitRangeIntoFileSegments( - *locked_key, offset, size, size, FileSegment::State::EMPTY, /* file_segments_limit */0, create_settings); + const auto ranges = splitRange(offset, size, size); + size_t file_segments_count = 0; + file_segments = createFileSegmentsFromRanges(*locked_key, ranges, file_segments_count, /* file_segments_limit */0, create_settings); } return std::make_unique(std::move(file_segments)); @@ -569,23 +555,27 @@ FileCache::getOrSet( assertInitialized(); - FileSegment::Range range(offset, offset + size - 1); + FileSegment::Range initial_range(offset, offset + size - 1); + /// result_range is initial range, which will be adjusted according to + /// 1. aligned offset, alighed_end_offset + /// 2. max_file_segments_limit + FileSegment::Range result_range = initial_range; - const auto aligned_offset = roundDownToMultiple(range.left, boundary_alignment); - auto aligned_end_offset = std::min(roundUpToMultiple(offset + size, boundary_alignment), file_size) - 1; + const auto aligned_offset = roundDownToMultiple(initial_range.left, boundary_alignment); + auto aligned_end_offset = std::min(roundUpToMultiple(initial_range.right + 1, boundary_alignment), file_size) - 1; - chassert(aligned_offset <= range.left); - chassert(aligned_end_offset >= range.right); + chassert(aligned_offset <= initial_range.left); + chassert(aligned_end_offset >= initial_range.right); auto locked_key = metadata.lockKeyMetadata(key, CacheMetadata::KeyNotFoundPolicy::CREATE_EMPTY, user); /// Get all segments which intersect with the given range. - auto file_segments = getImpl(*locked_key, range, file_segments_limit); + auto file_segments = getImpl(*locked_key, initial_range, file_segments_limit); if (file_segments_limit) { chassert(file_segments.size() <= file_segments_limit); if (file_segments.size() == file_segments_limit) - range.right = aligned_end_offset = file_segments.back()->range().right; + result_range.right = aligned_end_offset = file_segments.back()->range().right; } /// Check case if we have uncovered prefix, e.g. @@ -597,11 +587,11 @@ FileCache::getOrSet( /// [ ] /// ^----^ /// uncovered prefix. - const bool has_uncovered_prefix = file_segments.empty() || range.left < file_segments.front()->range().left; + const bool has_uncovered_prefix = file_segments.empty() || result_range.left < file_segments.front()->range().left; - if (aligned_offset < range.left && has_uncovered_prefix) + if (aligned_offset < result_range.left && has_uncovered_prefix) { - auto prefix_range = FileSegment::Range(aligned_offset, file_segments.empty() ? range.left - 1 : file_segments.front()->range().left - 1); + auto prefix_range = FileSegment::Range(aligned_offset, file_segments.empty() ? result_range.left - 1 : file_segments.front()->range().left - 1); auto prefix_file_segments = getImpl(*locked_key, prefix_range, /* file_segments_limit */0); if (prefix_file_segments.empty()) @@ -610,7 +600,7 @@ FileCache::getOrSet( /// ^ ^ ^ /// aligned_offset range.left range.right /// [___] [__________] <-- current cache (example) - range.left = aligned_offset; + result_range.left = aligned_offset; } else { @@ -621,10 +611,10 @@ FileCache::getOrSet( /// ^ /// prefix_file_segments.back().right - chassert(prefix_file_segments.back()->range().right < range.left); + chassert(prefix_file_segments.back()->range().right < result_range.left); chassert(prefix_file_segments.back()->range().right >= aligned_offset); - range.left = prefix_file_segments.back()->range().right + 1; + result_range.left = prefix_file_segments.back()->range().right + 1; } } @@ -637,11 +627,11 @@ FileCache::getOrSet( /// [___] /// ^---^ /// uncovered_suffix - const bool has_uncovered_suffix = file_segments.empty() || file_segments.back()->range().right < range.right; + const bool has_uncovered_suffix = file_segments.empty() || file_segments.back()->range().right < result_range.right; - if (range.right < aligned_end_offset && has_uncovered_suffix) + if (result_range.right < aligned_end_offset && has_uncovered_suffix) { - auto suffix_range = FileSegment::Range(range.right, aligned_end_offset); + auto suffix_range = FileSegment::Range(result_range.right, aligned_end_offset); /// We need to get 1 file segment, so file_segments_limit = 1 here. auto suffix_file_segments = getImpl(*locked_key, suffix_range, /* file_segments_limit */1); @@ -652,7 +642,7 @@ FileCache::getOrSet( /// range.left range.right aligned_end_offset /// [___] [___] <-- current cache (example) - range.right = aligned_end_offset; + result_range.right = aligned_end_offset; } else { @@ -662,35 +652,33 @@ FileCache::getOrSet( /// [___] [___] [_________] <-- current cache (example) /// ^ /// suffix_file_segments.front().left - range.right = suffix_file_segments.front()->range().left - 1; + result_range.right = suffix_file_segments.front()->range().left - 1; } } - chassert(range.left >= aligned_offset); - if (file_segments.empty()) { - file_segments = splitRangeIntoFileSegments( - *locked_key, range.left, /* size */offset + size - range.left, /* aligned_size */range.size(), - FileSegment::State::EMPTY, file_segments_limit, create_settings); + auto ranges = splitRange(result_range.left, initial_range.size() + (initial_range.left - result_range.left), result_range.size()); + size_t file_segments_count = file_segments.size(); + file_segments.splice(file_segments.end(), createFileSegmentsFromRanges(*locked_key, ranges, file_segments_count, file_segments_limit, create_settings)); } else { - chassert(file_segments.front()->range().right >= range.left); - chassert(file_segments.back()->range().left <= range.right); + chassert(file_segments.front()->range().right >= result_range.left); + chassert(file_segments.back()->range().left <= result_range.right); fillHolesWithEmptyFileSegments( - *locked_key, file_segments, range, offset + size - 1, file_segments_limit, /* fill_with_detached */false, create_settings); + *locked_key, file_segments, result_range, offset + size - 1, file_segments_limit, /* fill_with_detached */false, create_settings); - if (!file_segments.front()->range().contains(range.left)) + if (!file_segments.front()->range().contains(result_range.left)) { throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected {} to include {} " "(end offset: {}, aligned offset: {}, aligned end offset: {})", - file_segments.front()->range().toString(), offset, range.right, aligned_offset, aligned_end_offset); + file_segments.front()->range().toString(), offset, result_range.right, aligned_offset, aligned_end_offset); } } - chassert(file_segments_limit ? file_segments.back()->range().left <= range.right : file_segments.back()->range().contains(range.right)); + chassert(file_segments_limit ? file_segments.back()->range().left <= result_range.right : file_segments.back()->range().contains(result_range.right)); chassert(!file_segments_limit || file_segments.size() <= file_segments_limit); return std::make_unique(std::move(file_segments)); diff --git a/src/Interpreters/Cache/FileCache.h b/src/Interpreters/Cache/FileCache.h index 3f7eec73b56..07be802a940 100644 --- a/src/Interpreters/Cache/FileCache.h +++ b/src/Interpreters/Cache/FileCache.h @@ -265,16 +265,10 @@ private: /// each subrange size must be less or equal to max_file_segment_size. std::vector splitRange(size_t offset, size_t size, size_t aligned_size); - /// Split range into subranges by max_file_segment_size (same as in splitRange()) - /// and create a new file segment for each subrange. - /// If `file_segments_limit` > 0, create no more than first file_segments_limit - /// file segments. - FileSegments splitRangeIntoFileSegments( + FileSegments createFileSegmentsFromRanges( LockedKey & locked_key, - size_t offset, - size_t size, - size_t aligned_size, - FileSegment::State state, + const std::vector & ranges, + size_t & file_segments_count, size_t file_segments_limit, const CreateFileSegmentSettings & create_settings); diff --git a/tests/config/config.d/storage_conf.xml b/tests/config/config.d/storage_conf.xml index 4daa64b520d..e106e3a0e6b 100644 --- a/tests/config/config.d/storage_conf.xml +++ b/tests/config/config.d/storage_conf.xml @@ -19,7 +19,7 @@ cache s3_disk s3_cache/ - 100Mi + 104857600 5Mi 1 100 From 8d979680060f10e6bcec3fc83fb3bdbaa7bb3deb Mon Sep 17 00:00:00 2001 From: Lennard Eijsackers Date: Fri, 2 Aug 2024 15:13:37 +0200 Subject: [PATCH 609/661] Use FunctionArgumentDescriptors to check bitSlice function + add test case Signed-off-by: Lennard Eijsackers --- src/Functions/bitSlice.cpp | 30 ++++++++----------- ...214_bitslice_argument_evaluation.reference | 0 .../03214_bitslice_argument_evaluation.sql | 10 +++++++ 3 files changed, 22 insertions(+), 18 deletions(-) create mode 100644 tests/queries/0_stateless/03214_bitslice_argument_evaluation.reference create mode 100644 tests/queries/0_stateless/03214_bitslice_argument_evaluation.sql diff --git a/src/Functions/bitSlice.cpp b/src/Functions/bitSlice.cpp index e2b455846d8..f1d3bb57221 100644 --- a/src/Functions/bitSlice.cpp +++ b/src/Functions/bitSlice.cpp @@ -40,28 +40,22 @@ public: bool useDefaultImplementationForConstants() const override { return true; } - DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { - const size_t number_of_arguments = arguments.size(); + FunctionArgumentDescriptors mandatory_args{ + {"s", static_cast(&isStringOrFixedString), nullptr, "String"}, + {"offset", static_cast(&isNativeNumber), nullptr, "(U)Int8, (U)Int16, (U)Int32, (U)Int64 or Float"}, + }; - if (number_of_arguments < 2 || number_of_arguments > 3) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, - "Number of arguments for function {} doesn't match: passed {}, should be 2 or 3", - getName(), number_of_arguments); + FunctionArgumentDescriptors optional_args{ + {"length", static_cast(&isNativeNumber), nullptr, "(U)Int8, (U)Int16, (U)Int32, (U)Int64 or Float"}, + }; - if (!isString(arguments[0]) && !isStringOrFixedString(arguments[0])) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}", - arguments[0]->getName(), getName()); - if (arguments[0]->onlyNull()) - return arguments[0]; + validateFunctionArguments(*this, arguments, mandatory_args, optional_args); - if (!isNativeNumber(arguments[1])) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of second argument of function {}", - arguments[1]->getName(), getName()); - - if (number_of_arguments == 3 && !isNativeNumber(arguments[2])) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of second argument of function {}", - arguments[2]->getName(), getName()); + const auto & type = arguments[0].type; + if (type->onlyNull()) + return type; return std::make_shared(); } diff --git a/tests/queries/0_stateless/03214_bitslice_argument_evaluation.reference b/tests/queries/0_stateless/03214_bitslice_argument_evaluation.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03214_bitslice_argument_evaluation.sql b/tests/queries/0_stateless/03214_bitslice_argument_evaluation.sql new file mode 100644 index 00000000000..b8488600fcb --- /dev/null +++ b/tests/queries/0_stateless/03214_bitslice_argument_evaluation.sql @@ -0,0 +1,10 @@ +-- No arguments passed +SELECT bitSlice(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +-- Invalid 1st argument passed +SELECT bitSlice(1, 1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +-- Valid 1st argument, invalid 2nd argument passed +SELECT bitSlice('Hello', 'World'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +-- Valid 1st argument & 2nd argument, invalid 3rd argument passed +SELECT bitSlice('Hello', 1, 'World'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +-- More arguments then expected +SELECT bitSlice('Hello', 1, 1, 'World'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } From 83096249a7480b0bfa1d9246c17136727bba904c Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Fri, 2 Aug 2024 13:54:32 +0000 Subject: [PATCH 610/661] Update version_date.tsv and changelogs after v24.3.6.48-lts --- docs/changelogs/v24.3.6.48-lts.md | 39 ++++++++++++++++++++++++++++ utils/list-versions/version_date.tsv | 1 + 2 files changed, 40 insertions(+) create mode 100644 docs/changelogs/v24.3.6.48-lts.md diff --git a/docs/changelogs/v24.3.6.48-lts.md b/docs/changelogs/v24.3.6.48-lts.md new file mode 100644 index 00000000000..f045afc619b --- /dev/null +++ b/docs/changelogs/v24.3.6.48-lts.md @@ -0,0 +1,39 @@ +--- +sidebar_position: 1 +sidebar_label: 2024 +--- + +# 2024 Changelog + +### ClickHouse release v24.3.6.48-lts (b2d33c3c45d) FIXME as compared to v24.3.5.46-lts (fe54cead6b6) + +#### Critical Bug Fix (crash, LOGICAL_ERROR, data loss, RBAC) +* Backported in [#66889](https://github.com/ClickHouse/ClickHouse/issues/66889): Fix unexpeced size of low cardinality column in function calls. [#65298](https://github.com/ClickHouse/ClickHouse/pull/65298) ([Raúl Marín](https://github.com/Algunenano)). +* Backported in [#66687](https://github.com/ClickHouse/ClickHouse/issues/66687): Fix the VALID UNTIL clause in the user definition resetting after a restart. Closes [#66405](https://github.com/ClickHouse/ClickHouse/issues/66405). [#66409](https://github.com/ClickHouse/ClickHouse/pull/66409) ([Nikolay Degterinsky](https://github.com/evillique)). +* Backported in [#67497](https://github.com/ClickHouse/ClickHouse/issues/67497): Fix crash in DistributedAsyncInsert when connection is empty. [#67219](https://github.com/ClickHouse/ClickHouse/pull/67219) ([Pablo Marcos](https://github.com/pamarcos)). + +#### Bug Fix (user-visible misbehavior in an official stable release) +* Backported in [#66324](https://github.com/ClickHouse/ClickHouse/issues/66324): Add missing settings `input_format_csv_skip_first_lines/input_format_tsv_skip_first_lines/input_format_csv_try_infer_numbers_from_strings/input_format_csv_try_infer_strings_from_quoted_tuples` in schema inference cache because they can change the resulting schema. It prevents from incorrect result of schema inference with these settings changed. [#65980](https://github.com/ClickHouse/ClickHouse/pull/65980) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#66151](https://github.com/ClickHouse/ClickHouse/issues/66151): Fixed buffer overflow bug in `unbin`/`unhex` implementation. [#66106](https://github.com/ClickHouse/ClickHouse/pull/66106) ([Nikita Taranov](https://github.com/nickitat)). +* Backported in [#66451](https://github.com/ClickHouse/ClickHouse/issues/66451): Fixed a bug in ZooKeeper client: a session could get stuck in unusable state after receiving a hardware error from ZooKeeper. For example, this might happen due to "soft memory limit" in ClickHouse Keeper. [#66140](https://github.com/ClickHouse/ClickHouse/pull/66140) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#66222](https://github.com/ClickHouse/ClickHouse/issues/66222): Fix issue in SumIfToCountIfVisitor and signed integers. [#66146](https://github.com/ClickHouse/ClickHouse/pull/66146) ([Raúl Marín](https://github.com/Algunenano)). +* Backported in [#66676](https://github.com/ClickHouse/ClickHouse/issues/66676): Fix handling limit for `system.numbers_mt` when no index can be used. [#66231](https://github.com/ClickHouse/ClickHouse/pull/66231) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Backported in [#66602](https://github.com/ClickHouse/ClickHouse/issues/66602): Fixed how the ClickHouse server detects the maximum number of usable CPU cores as specified by cgroups v2 if the server runs in a container such as Docker. In more detail, containers often run their process in the root cgroup which has an empty name. In that case, ClickHouse ignored the CPU limits set by cgroups v2. [#66237](https://github.com/ClickHouse/ClickHouse/pull/66237) ([filimonov](https://github.com/filimonov)). +* Backported in [#66356](https://github.com/ClickHouse/ClickHouse/issues/66356): Fix the `Not-ready set` error when a subquery with `IN` is used in the constraint. [#66261](https://github.com/ClickHouse/ClickHouse/pull/66261) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#66970](https://github.com/ClickHouse/ClickHouse/issues/66970): Fix `Column identifier is already registered` error with `group_by_use_nulls=true` and new analyzer. [#66400](https://github.com/ClickHouse/ClickHouse/pull/66400) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#66967](https://github.com/ClickHouse/ClickHouse/issues/66967): Fix `Cannot find column` error for queries with constant expression in `GROUP BY` key and new analyzer enabled. [#66433](https://github.com/ClickHouse/ClickHouse/pull/66433) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#66718](https://github.com/ClickHouse/ClickHouse/issues/66718): Correctly track memory for `Allocator::realloc`. [#66548](https://github.com/ClickHouse/ClickHouse/pull/66548) ([Antonio Andelic](https://github.com/antonio2368)). +* Backported in [#66949](https://github.com/ClickHouse/ClickHouse/issues/66949): Fix an invalid result for queries with `WINDOW`. This could happen when `PARTITION` columns have sparse serialization and window functions are executed in parallel. [#66579](https://github.com/ClickHouse/ClickHouse/pull/66579) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#66946](https://github.com/ClickHouse/ClickHouse/issues/66946): Fix `Method getResultType is not supported for QUERY query node` error when scalar subquery was used as the first argument of IN (with new analyzer). [#66655](https://github.com/ClickHouse/ClickHouse/pull/66655) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#67629](https://github.com/ClickHouse/ClickHouse/issues/67629): Fix for occasional deadlock in Context::getDDLWorker. [#66843](https://github.com/ClickHouse/ClickHouse/pull/66843) ([Alexander Gololobov](https://github.com/davenger)). +* Backported in [#67193](https://github.com/ClickHouse/ClickHouse/issues/67193): TRUNCATE DATABASE used to stop replication as if it was a DROP DATABASE query, it's fixed. [#67129](https://github.com/ClickHouse/ClickHouse/pull/67129) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#67375](https://github.com/ClickHouse/ClickHouse/issues/67375): Fix error `Cannot convert column because it is non constant in source stream but must be constant in result.` for a query that reads from the `Merge` table over the `Distriburted` table with one shard. [#67146](https://github.com/ClickHouse/ClickHouse/pull/67146) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#67572](https://github.com/ClickHouse/ClickHouse/issues/67572): Fix execution of nested short-circuit functions. [#67520](https://github.com/ClickHouse/ClickHouse/pull/67520) ([Kruglov Pavel](https://github.com/Avogar)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Backported in [#66422](https://github.com/ClickHouse/ClickHouse/issues/66422): Ignore subquery for IN in DDLLoadingDependencyVisitor. [#66395](https://github.com/ClickHouse/ClickHouse/pull/66395) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#66855](https://github.com/ClickHouse/ClickHouse/issues/66855): Fix data race in S3::ClientCache. [#66644](https://github.com/ClickHouse/ClickHouse/pull/66644) ([Konstantin Morozov](https://github.com/k-morozov)). +* Backported in [#67055](https://github.com/ClickHouse/ClickHouse/issues/67055): Increase asio pool size in case the server is tiny. [#66761](https://github.com/ClickHouse/ClickHouse/pull/66761) ([alesapin](https://github.com/alesapin)). +* Backported in [#66943](https://github.com/ClickHouse/ClickHouse/issues/66943): Small fix in realloc memory tracking. [#66820](https://github.com/ClickHouse/ClickHouse/pull/66820) ([Antonio Andelic](https://github.com/antonio2368)). + diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv index 7b5dcda82e3..24488066190 100644 --- a/utils/list-versions/version_date.tsv +++ b/utils/list-versions/version_date.tsv @@ -10,6 +10,7 @@ v24.4.4.113-stable 2024-08-02 v24.4.3.25-stable 2024-06-14 v24.4.2.141-stable 2024-06-07 v24.4.1.2088-stable 2024-05-01 +v24.3.6.48-lts 2024-08-02 v24.3.5.46-lts 2024-07-03 v24.3.4.147-lts 2024-06-13 v24.3.3.102-lts 2024-05-01 From 9c7464e0653782af385dbc884dd3acecfc69c6cc Mon Sep 17 00:00:00 2001 From: Nikita Fomichev Date: Fri, 2 Aug 2024 16:04:11 +0200 Subject: [PATCH 611/661] Stateless tests: reduce pure_http_client timeout to get reasons of timed out tests --- tests/queries/0_stateless/helpers/pure_http_client.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/queries/0_stateless/helpers/pure_http_client.py b/tests/queries/0_stateless/helpers/pure_http_client.py index 0e7a4d27f4f..7a8efec36bb 100644 --- a/tests/queries/0_stateless/helpers/pure_http_client.py +++ b/tests/queries/0_stateless/helpers/pure_http_client.py @@ -18,7 +18,7 @@ class ClickHouseClient: self.host = host def query( - self, query, connection_timeout=1500, settings=dict(), binary_result=False + self, query, connection_timeout=500, settings=dict(), binary_result=False ): NUMBER_OF_TRIES = 30 DELAY = 10 @@ -47,12 +47,12 @@ class ClickHouseClient: else: raise ValueError(r.text) - def query_return_df(self, query, connection_timeout=1500): + def query_return_df(self, query, connection_timeout=500): data = self.query(query, connection_timeout) df = pd.read_csv(io.StringIO(data), sep="\t") return df - def query_with_data(self, query, data, connection_timeout=1500, settings=dict()): + def query_with_data(self, query, data, connection_timeout=500, settings=dict()): params = { "query": query, "timeout_before_checking_execution_speed": 120, From 2c9cef38e56c65ec9bbe7f3af21d4865662f6e9a Mon Sep 17 00:00:00 2001 From: Nikita Fomichev Date: Fri, 2 Aug 2024 16:05:39 +0200 Subject: [PATCH 612/661] Stateless tests: fix hanging tests `02473_multistep_prewhere*` `00411_long_accurate_number_comparison*` --- ...411_long_accurate_number_comparison.python | 19 ++++++------------- .../02473_multistep_prewhere.python | 4 ++-- .../02473_multistep_split_prewhere.python | 4 ++-- .../0_stateless/helpers/pure_http_client.py | 17 ++++++++++++++++- 4 files changed, 26 insertions(+), 18 deletions(-) diff --git a/tests/queries/0_stateless/00411_long_accurate_number_comparison.python b/tests/queries/0_stateless/00411_long_accurate_number_comparison.python index 045de9ee7ee..38b108a696f 100644 --- a/tests/queries/0_stateless/00411_long_accurate_number_comparison.python +++ b/tests/queries/0_stateless/00411_long_accurate_number_comparison.python @@ -2,23 +2,16 @@ import os, itertools, urllib.request, urllib.parse, urllib.error, urllib.request, urllib.error, urllib.parse, sys +CURDIR = os.path.dirname(os.path.realpath(__file__)) +sys.path.insert(0, os.path.join(CURDIR, "helpers")) -def get_ch_answer(query): - return ( - urllib.request.urlopen( - os.environ.get( - "CLICKHOUSE_URL", - "http://localhost:" + os.environ.get("CLICKHOUSE_PORT_HTTP", "8123"), - ), - data=query.encode(), - ) - .read() - .decode() - ) +from pure_http_client import ClickHouseClient + +client = ClickHouseClient() def check_answers(query, answer): - ch_answer = get_ch_answer(query) + ch_answer = client.query(query) if ch_answer.strip() != answer.strip(): print("FAIL on query:", query) print("Expected answer:", answer) diff --git a/tests/queries/0_stateless/02473_multistep_prewhere.python b/tests/queries/0_stateless/02473_multistep_prewhere.python index 11095202039..09326b6365d 100644 --- a/tests/queries/0_stateless/02473_multistep_prewhere.python +++ b/tests/queries/0_stateless/02473_multistep_prewhere.python @@ -6,7 +6,7 @@ import sys CURDIR = os.path.dirname(os.path.realpath(__file__)) sys.path.insert(0, os.path.join(CURDIR, "helpers")) -from pure_http_client import ClickHouseClient +from pure_http_client import ClickHouseClient, requests_session_with_retries class Tester: @@ -195,7 +195,7 @@ def main(): default_index_granularity = 10 total_rows = 7 * default_index_granularity step = default_index_granularity - session = requests.Session() + session = requests_session_with_retries() for index_granularity in [ default_index_granularity - 1, default_index_granularity, diff --git a/tests/queries/0_stateless/02473_multistep_split_prewhere.python b/tests/queries/0_stateless/02473_multistep_split_prewhere.python index 19444994fd2..10e94059171 100644 --- a/tests/queries/0_stateless/02473_multistep_split_prewhere.python +++ b/tests/queries/0_stateless/02473_multistep_split_prewhere.python @@ -6,7 +6,7 @@ import sys CURDIR = os.path.dirname(os.path.realpath(__file__)) sys.path.insert(0, os.path.join(CURDIR, "helpers")) -from pure_http_client import ClickHouseClient +from pure_http_client import ClickHouseClient, requests_session_with_retries class Tester: @@ -161,7 +161,7 @@ def main(): default_index_granularity = 10 total_rows = 8 * default_index_granularity step = default_index_granularity - session = requests.Session() + session = requests_session_with_retries() for index_granularity in [default_index_granularity - 1, default_index_granularity]: tester = Tester(session, url, index_granularity, total_rows) # Test combinations of ranges of columns c and d diff --git a/tests/queries/0_stateless/helpers/pure_http_client.py b/tests/queries/0_stateless/helpers/pure_http_client.py index 7a8efec36bb..a31a91e0550 100644 --- a/tests/queries/0_stateless/helpers/pure_http_client.py +++ b/tests/queries/0_stateless/helpers/pure_http_client.py @@ -1,7 +1,8 @@ import os import io -import sys import requests +from requests.adapters import HTTPAdapter +from requests.packages.urllib3.util.retry import Retry import time import pandas as pd @@ -77,3 +78,17 @@ class ClickHouseClient: return result else: raise ValueError(r.text) + + +def requests_session_with_retries(retries=3, timeout=180): + session = requests.Session() + retry = Retry( + total=retries, + read=retries, + connect=retries, + ) + adapter = HTTPAdapter(max_retries=retry) + session.mount("http://", adapter) + session.mount("https://", adapter) + session.timeout = timeout + return session From aefed7cdd62e874f7507afe69d803c9164a283ea Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 2 Aug 2024 16:06:53 +0200 Subject: [PATCH 613/661] Update tests/queries/0_stateless/03164_s3_settings_for_queries_and_merges.sql --- .../0_stateless/03164_s3_settings_for_queries_and_merges.sql | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/queries/0_stateless/03164_s3_settings_for_queries_and_merges.sql b/tests/queries/0_stateless/03164_s3_settings_for_queries_and_merges.sql index 001ef382850..a6932e0536c 100644 --- a/tests/queries/0_stateless/03164_s3_settings_for_queries_and_merges.sql +++ b/tests/queries/0_stateless/03164_s3_settings_for_queries_and_merges.sql @@ -21,6 +21,7 @@ SYSTEM DROP MARK CACHE; SELECT count() FROM t_compact_bytes_s3 WHERE NOT ignore(c2, c4); SYSTEM FLUSH LOGS; +-- Errors in S3 requests will be automatically retried, however ProfileEvents can be wrong. That is why we subtract errors. SELECT ProfileEvents['S3ReadRequestsCount'] - ProfileEvents['S3ReadRequestsErrors'], ProfileEvents['ReadBufferFromS3Bytes'] < ProfileEvents['ReadCompressedBytes'] * 1.1 From 664e131f4f2e46fc216305c440e840a5a5784328 Mon Sep 17 00:00:00 2001 From: Nikita Fomichev Date: Fri, 2 Aug 2024 16:09:48 +0200 Subject: [PATCH 614/661] Integration tests: fix ports clashing problem 4 --- tests/integration/conftest.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index b4c86a1cd2f..aa235118aed 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -1,4 +1,6 @@ #!/usr/bin/env python3 +# pylint: disable=unused-argument +# pylint: disable=broad-exception-raised import logging import os From b3e2ce695514d4d314ed8ac1ecdb111c5f94ac7d Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 31 Jul 2024 17:38:20 +0200 Subject: [PATCH 615/661] Minor change --- src/Databases/DatabaseOnDisk.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Databases/DatabaseOnDisk.cpp b/src/Databases/DatabaseOnDisk.cpp index f419f5811a1..734f354d9a5 100644 --- a/src/Databases/DatabaseOnDisk.cpp +++ b/src/Databases/DatabaseOnDisk.cpp @@ -313,7 +313,7 @@ void DatabaseOnDisk::detachTablePermanently(ContextPtr query_context, const Stri std::lock_guard lock(mutex); if (const auto it = snapshot_detached_tables.find(table_name); it == snapshot_detached_tables.end()) { - throw Exception(ErrorCodes::LOGICAL_ERROR, "Snapshot doesn't contain info about detached table={}", table_name); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Snapshot doesn't contain info about detached table `{}`", table_name); } else { From 2110b765d19ce4e68d0c23eab219e199aec0ea96 Mon Sep 17 00:00:00 2001 From: Lennard Eijsackers Date: Fri, 2 Aug 2024 17:53:12 +0200 Subject: [PATCH 616/661] Style check fix + adding debug info to query output Signed-off-by: Lennard Eijsackers --- src/Functions/bitSlice.cpp | 2 -- .../03214_bitslice_argument_evaluation.reference | 11 +++++++++++ .../03214_bitslice_argument_evaluation.sql | 1 + 3 files changed, 12 insertions(+), 2 deletions(-) diff --git a/src/Functions/bitSlice.cpp b/src/Functions/bitSlice.cpp index f1d3bb57221..f24473351ae 100644 --- a/src/Functions/bitSlice.cpp +++ b/src/Functions/bitSlice.cpp @@ -18,9 +18,7 @@ using namespace GatherUtils; namespace ErrorCodes { extern const int ILLEGAL_COLUMN; - extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int ZERO_ARRAY_OR_TUPLE_INDEX; - extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; } class FunctionBitSlice : public IFunction diff --git a/tests/queries/0_stateless/03214_bitslice_argument_evaluation.reference b/tests/queries/0_stateless/03214_bitslice_argument_evaluation.reference index e69de29bb2d..1731dfa0d79 100644 --- a/tests/queries/0_stateless/03214_bitslice_argument_evaluation.reference +++ b/tests/queries/0_stateless/03214_bitslice_argument_evaluation.reference @@ -0,0 +1,11 @@ +-- { echo } +-- No arguments passed +SELECT bitSlice(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +-- Invalid 1st argument passed +SELECT bitSlice(1, 1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +-- Valid 1st argument, invalid 2nd argument passed +SELECT bitSlice('Hello', 'World'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +-- Valid 1st argument & 2nd argument, invalid 3rd argument passed +SELECT bitSlice('Hello', 1, 'World'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +-- More arguments then expected +SELECT bitSlice('Hello', 1, 1, 'World'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } diff --git a/tests/queries/0_stateless/03214_bitslice_argument_evaluation.sql b/tests/queries/0_stateless/03214_bitslice_argument_evaluation.sql index b8488600fcb..1731dfa0d79 100644 --- a/tests/queries/0_stateless/03214_bitslice_argument_evaluation.sql +++ b/tests/queries/0_stateless/03214_bitslice_argument_evaluation.sql @@ -1,3 +1,4 @@ +-- { echo } -- No arguments passed SELECT bitSlice(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } -- Invalid 1st argument passed From 364622f567028ffc70785b681fc246d7151eef04 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 31 Jul 2024 17:38:20 +0200 Subject: [PATCH 617/661] Minor change --- src/Databases/DatabaseOnDisk.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Databases/DatabaseOnDisk.cpp b/src/Databases/DatabaseOnDisk.cpp index f419f5811a1..734f354d9a5 100644 --- a/src/Databases/DatabaseOnDisk.cpp +++ b/src/Databases/DatabaseOnDisk.cpp @@ -313,7 +313,7 @@ void DatabaseOnDisk::detachTablePermanently(ContextPtr query_context, const Stri std::lock_guard lock(mutex); if (const auto it = snapshot_detached_tables.find(table_name); it == snapshot_detached_tables.end()) { - throw Exception(ErrorCodes::LOGICAL_ERROR, "Snapshot doesn't contain info about detached table={}", table_name); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Snapshot doesn't contain info about detached table `{}`", table_name); } else { From fcb0ce7361f74dd8d97a3007f77248f293b2ce5f Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 2 Aug 2024 18:17:32 +0200 Subject: [PATCH 618/661] Fix docs build --- docker/docs/builder/run.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/docker/docs/builder/run.sh b/docker/docs/builder/run.sh index 01c15cb4b0f..d73adb5d279 100755 --- a/docker/docs/builder/run.sh +++ b/docker/docs/builder/run.sh @@ -26,7 +26,6 @@ sed -i '/onBrokenMarkdownLinks:/ s/ignore/error/g' docusaurus.config.js if [[ $# -lt 1 ]] || [[ "$1" == "--"* ]]; then export CI=true - yarn install exec yarn build "$@" fi From cebb3668380f65187b201e638013df40f8ac8ada Mon Sep 17 00:00:00 2001 From: Max K Date: Fri, 2 Aug 2024 09:23:40 +0200 Subject: [PATCH 619/661] more fixes --- .github/actions/check_workflow/action.yml | 21 ++++ .github/workflows/create_release.yml | 27 +++-- .github/workflows/pull_request.yml | 9 +- tests/ci/artifactory.py | 6 +- tests/ci/auto_release.py | 6 +- tests/ci/ci.py | 4 +- tests/ci/ci_buddy.py | 30 +++-- tests/ci/ci_cache.py | 14 +-- tests/ci/ci_config.py | 3 +- tests/ci/ci_definitions.py | 3 +- tests/ci/ci_metadata.py | 4 +- tests/ci/ci_utils.py | 30 ++++- tests/ci/create_release.py | 131 ++++++++++++++-------- tests/ci/docker_server.py | 73 +++--------- tests/ci/test_docker.py | 46 +------- 15 files changed, 213 insertions(+), 194 deletions(-) create mode 100644 .github/actions/check_workflow/action.yml diff --git a/.github/actions/check_workflow/action.yml b/.github/actions/check_workflow/action.yml new file mode 100644 index 00000000000..19a3cec76f5 --- /dev/null +++ b/.github/actions/check_workflow/action.yml @@ -0,0 +1,21 @@ +name: CheckWorkflowResults + +description: Check overall workflow status and post error to slack if any + +inputs: + needs: + description: github needs context as a json string + required: true + type: string + +runs: + using: "composite" + steps: + - name: Check Workflow + shell: bash + run: | + export WORKFLOW_RESULT_FILE="/tmp/workflow_results.json" + cat > "$WORKFLOW_RESULT_FILE" << 'EOF' + ${{ inputs.needs }} + EOF + python3 ./tests/ci/ci_buddy.py --check-wf-status diff --git a/.github/workflows/create_release.yml b/.github/workflows/create_release.yml index e27db1b09a4..29094cc51a6 100644 --- a/.github/workflows/create_release.yml +++ b/.github/workflows/create_release.yml @@ -24,7 +24,7 @@ concurrency: dry-run: description: 'Dry run' required: false - default: true + default: false type: boolean jobs: @@ -43,16 +43,27 @@ jobs: - name: Prepare Release Info shell: bash run: | + if [ ${{ inputs.only-repo }} == "true" ]; then + git tag -l ${{ inputs.ref }} || { echo "With only-repo option ref must be a valid release tag"; exit 1; } + fi python3 ./tests/ci/create_release.py --prepare-release-info \ - --ref ${{ inputs.ref }} --release-type ${{ inputs.type }} ${{ inputs.dry-run == true && '--dry-run' || '' }} + --ref ${{ inputs.ref }} --release-type ${{ inputs.type }} \ + ${{ inputs.dry-run == true && '--dry-run' || '' }} \ + ${{ inputs.only-repo == true && '--skip-tag-check' || '' }} echo "::group::Release Info" python3 -m json.tool /tmp/release_info.json echo "::endgroup::" release_tag=$(jq -r '.release_tag' /tmp/release_info.json) commit_sha=$(jq -r '.commit_sha' /tmp/release_info.json) + is_latest=$(jq -r '.latest' /tmp/release_info.json) echo "Release Tag: $release_tag" echo "RELEASE_TAG=$release_tag" >> "$GITHUB_ENV" echo "COMMIT_SHA=$commit_sha" >> "$GITHUB_ENV" + if [ "$is_latest" == "true" ]; then + echo "DOCKER_TAG_TYPE=release-latest" >> "$GITHUB_ENV" + else + echo "DOCKER_TAG_TYPE=release" >> "$GITHUB_ENV" + fi - name: Download All Release Artifacts if: ${{ inputs.type == 'patch' }} shell: bash @@ -85,10 +96,11 @@ jobs: echo "Generate ChangeLog" export CI=1 docker run -u "${UID}:${GID}" -e PYTHONUNBUFFERED=1 -e CI=1 --network=host \ - --volume=".:/ClickHouse" clickhouse/style-test \ - /ClickHouse/tests/ci/changelog.py -v --debug-helpers \ + --volume=".:/wd" --workdir="/wd" \ + clickhouse/style-test \ + ./tests/ci/changelog.py -v --debug-helpers \ --jobs=5 \ - --output="/ClickHouse/docs/changelogs/${{ env.RELEASE_TAG }}.md" ${{ env.RELEASE_TAG }} + --output="./docs/changelogs/${{ env.RELEASE_TAG }}.md" ${{ env.RELEASE_TAG }} git add ./docs/changelogs/${{ env.RELEASE_TAG }}.md echo "Generate Security" python3 ./utils/security-generator/generate_security.py > SECURITY.md @@ -160,7 +172,7 @@ jobs: cd "./tests/ci" python3 ./create_release.py --set-progress-started --progress "docker server release" export CHECK_NAME="Docker server image" - python3 docker_server.py --release-type auto --version ${{ env.RELEASE_TAG }} --check-name "$CHECK_NAME" --sha ${{ env.COMMIT_SHA }} ${{ ! inputs.dry-run && '--push' || '' }} + python3 docker_server.py --tag-type ${{ env.DOCKER_TAG_TYPE }} --version ${{ env.RELEASE_TAG }} --check-name "$CHECK_NAME" --sha ${{ env.COMMIT_SHA }} ${{ ! inputs.dry-run && '--push' || '' }} python3 ./create_release.py --set-progress-completed - name: Docker clickhouse/clickhouse-keeper building if: ${{ inputs.type == 'patch' }} @@ -169,7 +181,7 @@ jobs: cd "./tests/ci" python3 ./create_release.py --set-progress-started --progress "docker keeper release" export CHECK_NAME="Docker keeper image" - python3 docker_server.py --release-type auto --version ${{ env.RELEASE_TAG }} --check-name "$CHECK_NAME" --sha ${{ env.COMMIT_SHA }} ${{ ! inputs.dry-run && '--push' || '' }} + python3 docker_server.py --tag-type ${{ env.DOCKER_TAG_TYPE }} --version ${{ env.RELEASE_TAG }} --check-name "$CHECK_NAME" --sha ${{ env.COMMIT_SHA }} ${{ ! inputs.dry-run && '--push' || '' }} python3 ./create_release.py --set-progress-completed - name: Update release info. Merge created PRs shell: bash @@ -178,6 +190,7 @@ jobs: - name: Set current Release progress to Completed with OK shell: bash run: | + # dummy stage to finalize release info with "progress: completed; status: OK" python3 ./tests/ci/create_release.py --set-progress-started --progress "completed" python3 ./tests/ci/create_release.py --set-progress-completed - name: Post Slack Message diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index 04bef1460a6..071f0f1e20a 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -172,12 +172,9 @@ jobs: cd "$GITHUB_WORKSPACE/tests/ci" python3 merge_pr.py --set-ci-status --wf-status ${{ contains(needs.*.result, 'failure') && 'failure' || 'success' }} - name: Check Workflow results - run: | - export WORKFLOW_RESULT_FILE="/tmp/workflow_results.json" - cat > "$WORKFLOW_RESULT_FILE" << 'EOF' - ${{ toJson(needs) }} - EOF - python3 ./tests/ci/ci_buddy.py --check-wf-status + uses: ./.github/actions/check_workflow + with: + needs: ${{ toJson(needs) }} ################################# Stage Final ################################# # diff --git a/tests/ci/artifactory.py b/tests/ci/artifactory.py index 8bba7bca30e..f3d7d24f717 100644 --- a/tests/ci/artifactory.py +++ b/tests/ci/artifactory.py @@ -158,7 +158,7 @@ class DebianArtifactory: print("Running test command:") print(f" {cmd}") assert Shell.check(cmd) - self.release_info.debian_command = debian_command + self.release_info.debian = debian_command self.release_info.dump() @@ -240,7 +240,7 @@ class RpmArtifactory: print("Running test command:") print(f" {cmd}") assert Shell.check(cmd) - self.release_info.rpm_command = rpm_command + self.release_info.rpm = rpm_command self.release_info.dump() @@ -304,7 +304,7 @@ class TgzArtifactory: expected_checksum == actual_checksum ), f"[{actual_checksum} != {expected_checksum}]" Shell.check("rm /tmp/tmp.tgz*", verbose=True) - self.release_info.tgz_command = cmd + self.release_info.tgz = cmd self.release_info.dump() diff --git a/tests/ci/auto_release.py b/tests/ci/auto_release.py index 6c17b4c74ad..3cc88634004 100644 --- a/tests/ci/auto_release.py +++ b/tests/ci/auto_release.py @@ -127,15 +127,13 @@ def _prepare(token): ) commit_num -= 1 - is_completed = CI.GHActions.check_wf_completed( - token=token, commit_sha=commit - ) + is_completed = CI.GH.check_wf_completed(token=token, commit_sha=commit) if not is_completed: print(f"CI is in progress for [{commit}] - check previous commit") commits_to_branch_head += 1 continue - commit_ci_status = CI.GHActions.get_commit_status_by_name( + commit_ci_status = CI.GH.get_commit_status_by_name( token=token, commit_sha=commit, status_name=(CI.JobNames.BUILD_CHECK, "ClickHouse build check"), diff --git a/tests/ci/ci.py b/tests/ci/ci.py index 935fe472e50..2565c8944e4 100644 --- a/tests/ci/ci.py +++ b/tests/ci/ci.py @@ -16,7 +16,7 @@ import upload_result_helper from build_check import get_release_or_pr from ci_config import CI from ci_metadata import CiMetadata -from ci_utils import GHActions, normalize_string, Utils +from ci_utils import GH, normalize_string, Utils from clickhouse_helper import ( CiLogsCredentials, ClickHouseHelper, @@ -368,7 +368,7 @@ def _pre_action(s3, job_name, batch, indata, pr_info): ) to_be_skipped = True # skip_status = SUCCESS already there - GHActions.print_in_group("Commit Status Data", job_status) + GH.print_in_group("Commit Status Data", job_status) # create pre report jr = JobReport.create_pre_report(status=skip_status, job_skipped=to_be_skipped) diff --git a/tests/ci/ci_buddy.py b/tests/ci/ci_buddy.py index 138909c1db0..f0e73e925fe 100644 --- a/tests/ci/ci_buddy.py +++ b/tests/ci/ci_buddy.py @@ -8,7 +8,7 @@ import requests from botocore.exceptions import ClientError from pr_info import PRInfo -from ci_utils import Shell, GHActions +from ci_config import CI class CIBuddy: @@ -31,10 +31,19 @@ class CIBuddy: self.sha = pr_info.sha[:10] def check_workflow(self): - GHActions.print_workflow_results() - res = GHActions.get_workflow_job_result(GHActions.ActionsNames.RunConfig) - if res != GHActions.ActionStatuses.SUCCESS: - self.post_job_error("Workflow Configuration Failed", critical=True) + CI.GH.print_workflow_results() + if CI.Envs.GITHUB_WORKFLOW == CI.WorkFlowNames.CreateRelease: + if not CI.GH.is_workflow_ok(): + self.post_job_error( + f"{CI.Envs.GITHUB_WORKFLOW} Workflow Failed", critical=True + ) + else: + res = CI.GH.get_workflow_job_result(CI.GH.ActionsNames.RunConfig) + if res != CI.GH.ActionStatuses.SUCCESS: + print(f"ERROR: RunConfig status is [{res}] - post report to slack") + self.post_job_error( + f"{CI.Envs.GITHUB_WORKFLOW} Workflow Failed", critical=True + ) @staticmethod def _get_webhooks(): @@ -74,10 +83,13 @@ class CIBuddy: message = title if isinstance(body, dict): for name, value in body.items(): - if "commit_sha" in name: + if "sha" in name and value and len(value) == 40: value = ( f"" ) + elif isinstance(value, str) and value.startswith("https://github.com/"): + value_shorten = value.split("/")[-1] + value = f"<{value}|{value_shorten}>" message += f" *{name}*: {value}\n" else: message += body + "\n" @@ -120,9 +132,11 @@ class CIBuddy: ) -> None: instance_id, instance_type = "unknown", "unknown" if with_instance_info: - instance_id = Shell.get_output("ec2metadata --instance-id") or instance_id + instance_id = ( + CI.Shell.get_output("ec2metadata --instance-id") or instance_id + ) instance_type = ( - Shell.get_output("ec2metadata --instance-type") or instance_type + CI.Shell.get_output("ec2metadata --instance-type") or instance_type ) if not job_name: job_name = os.getenv("CHECK_NAME", "unknown") diff --git a/tests/ci/ci_cache.py b/tests/ci/ci_cache.py index 85eabb84f9f..4846233ab03 100644 --- a/tests/ci/ci_cache.py +++ b/tests/ci/ci_cache.py @@ -7,7 +7,7 @@ from typing import Dict, Optional, Any, Union, Sequence, List, Set from ci_config import CI -from ci_utils import is_hex, GHActions +from ci_utils import is_hex, GH from commit_status_helper import CommitStatusData from env_helper import ( TEMP_PATH, @@ -258,15 +258,15 @@ class CiCache: def print_status(self): print(f"Cache enabled: [{self.enabled}]") for record_type in self.RecordType: - GHActions.print_in_group( + GH.print_in_group( f"Cache records: [{record_type}]", list(self.records[record_type]) ) - GHActions.print_in_group( + GH.print_in_group( "Jobs to do:", list(self.jobs_to_do.items()), ) - GHActions.print_in_group("Jobs to skip:", self.jobs_to_skip) - GHActions.print_in_group( + GH.print_in_group("Jobs to skip:", self.jobs_to_skip) + GH.print_in_group( "Jobs to wait:", list(self.jobs_to_wait.items()), ) @@ -788,7 +788,7 @@ class CiCache: while round_cnt < MAX_ROUNDS_TO_WAIT: round_cnt += 1 - GHActions.print_in_group( + GH.print_in_group( f"Wait pending jobs, round [{round_cnt}/{MAX_ROUNDS_TO_WAIT}]:", list(self.jobs_to_wait), ) @@ -853,7 +853,7 @@ class CiCache: # make up for 2 iterations in dry_run expired_sec += int(TIMEOUT / 2) + 1 - GHActions.print_in_group( + GH.print_in_group( "Remaining jobs:", [list(self.jobs_to_wait)], ) diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py index a7df884a091..c031ca9b805 100644 --- a/tests/ci/ci_config.py +++ b/tests/ci/ci_config.py @@ -34,7 +34,8 @@ class CI: from ci_definitions import Runners as Runners from ci_utils import Envs as Envs from ci_utils import Utils as Utils - from ci_utils import GHActions as GHActions + from ci_utils import GH as GH + from ci_utils import Shell as Shell from ci_definitions import Labels as Labels from ci_definitions import TRUSTED_CONTRIBUTORS as TRUSTED_CONTRIBUTORS from ci_definitions import WorkFlowNames as WorkFlowNames diff --git a/tests/ci/ci_definitions.py b/tests/ci/ci_definitions.py index 51de8c63509..de6791acda8 100644 --- a/tests/ci/ci_definitions.py +++ b/tests/ci/ci_definitions.py @@ -112,6 +112,7 @@ class WorkFlowNames(metaclass=WithIter): """ JEPSEN = "JepsenWorkflow" + CreateRelease = "CreateRelease" class BuildNames(metaclass=WithIter): @@ -578,7 +579,7 @@ class CommonJobConfigs: DOCKER_SERVER = JobConfig( job_name_keyword="docker", required_on_release_branch=True, - run_command='docker_server.py --check-name "$CHECK_NAME" --release-type head --allow-build-reuse', + run_command='docker_server.py --check-name "$CHECK_NAME" --tag-type head --allow-build-reuse', digest=DigestConfig( include_paths=[ "tests/ci/docker_server.py", diff --git a/tests/ci/ci_metadata.py b/tests/ci/ci_metadata.py index a767d102811..67106262634 100644 --- a/tests/ci/ci_metadata.py +++ b/tests/ci/ci_metadata.py @@ -9,7 +9,7 @@ from env_helper import ( S3_BUILDS_BUCKET_PUBLIC, ) from s3_helper import S3Helper -from ci_utils import GHActions +from ci_utils import GH from synchronizer_utils import SYNC_BRANCH_PREFIX @@ -111,7 +111,7 @@ class CiMetadata: else: log_title = f"Storing workflow metadata: PR [{self.pr_number}], upstream PR [{self.upstream_pr_number}]" - GHActions.print_in_group( + GH.print_in_group( log_title, [f"run_id: {self.run_id}"], ) diff --git a/tests/ci/ci_utils.py b/tests/ci/ci_utils.py index 4f696a2c55a..dae1520afb6 100644 --- a/tests/ci/ci_utils.py +++ b/tests/ci/ci_utils.py @@ -16,6 +16,8 @@ class Envs: WORKFLOW_RESULT_FILE = os.getenv( "WORKFLOW_RESULT_FILE", "/tmp/workflow_results.json" ) + S3_BUILDS_BUCKET = os.getenv("S3_BUILDS_BUCKET", "clickhouse-builds") + GITHUB_WORKFLOW = os.getenv("GITHUB_WORKFLOW", "") LABEL_CATEGORIES = { @@ -83,7 +85,7 @@ def normalize_string(string: str) -> str: return res -class GHActions: +class GH: class ActionsNames: RunConfig = "RunConfig" @@ -117,6 +119,14 @@ class GHActions: results = [f"{job}: {data['result']}" for job, data in res.items()] cls.print_in_group("Workflow results", results) + @classmethod + def is_workflow_ok(cls) -> bool: + res = cls._get_workflow_results() + for _job, data in res.items(): + if data["result"] == "failure": + return False + return bool(res) + @classmethod def get_workflow_job_result(cls, wf_job_name: str) -> Optional[str]: res = cls._get_workflow_results() @@ -189,15 +199,25 @@ class GHActions: return False @staticmethod - def get_pr_url_by_branch(repo, branch): - get_url_cmd = ( - f"gh pr list --repo {repo} --head {branch} --json url --jq '.[0].url'" - ) + def get_pr_url_by_branch(branch, repo=None): + repo = repo or Envs.GITHUB_REPOSITORY + get_url_cmd = f"gh pr list --repo {repo} --head {branch} --json url --jq '.[0].url' --state open" url = Shell.get_output(get_url_cmd) + if not url: + print(f"WARNING: No open PR found, branch [{branch}] - search for merged") + get_url_cmd = f"gh pr list --repo {repo} --head {branch} --json url --jq '.[0].url' --state merged" + url = Shell.get_output(get_url_cmd) if not url: print(f"ERROR: PR nor found, branch [{branch}]") return url + @staticmethod + def is_latest_release_branch(branch): + latest_branch = Shell.get_output( + 'gh pr list --label release --repo ClickHouse/ClickHouse --search "sort:created" -L1 --json headRefName' + ) + return latest_branch == branch + class Shell: @classmethod diff --git a/tests/ci/create_release.py b/tests/ci/create_release.py index c407a74fbf0..b4e08f29dbe 100755 --- a/tests/ci/create_release.py +++ b/tests/ci/create_release.py @@ -10,9 +10,8 @@ from typing import Iterator, List from git_helper import Git, GIT_PREFIX from ssh import SSHAgent -from env_helper import GITHUB_REPOSITORY, S3_BUILDS_BUCKET from s3_helper import S3Helper -from ci_utils import Shell, GHActions +from ci_utils import Shell, GH from ci_buddy import CIBuddy from version_helper import ( FILE_WITH_VERSION_PATH, @@ -69,13 +68,14 @@ class ReleaseContextManager: previous_release_tag="NA", previous_release_sha="NA", release_progress=ReleaseProgress.STARTED, + latest=False, ).dump() else: # fetch release info from fs and update self.release_info = ReleaseInfo.from_file() assert self.release_info assert ( - self.release_info.progress_description == ReleaseProgressDescription.OK + self.release_info.progress_status == ReleaseProgressDescription.OK ), "Must be OK on the start of new context" self.release_info.release_progress = self.release_progress self.release_info.dump() @@ -84,9 +84,9 @@ class ReleaseContextManager: def __exit__(self, exc_type, exc_value, traceback): assert self.release_info if exc_type is not None: - self.release_info.progress_description = ReleaseProgressDescription.FAILED + self.release_info.progress_status = ReleaseProgressDescription.FAILED else: - self.release_info.progress_description = ReleaseProgressDescription.OK + self.release_info.progress_status = ReleaseProgressDescription.OK self.release_info.dump() @@ -96,6 +96,7 @@ class ReleaseInfo: release_tag: str release_branch: str commit_sha: str + latest: bool # lts or stable codename: str previous_release_tag: str @@ -104,12 +105,12 @@ class ReleaseInfo: version_bump_pr: str = "" prs_merged: bool = False release_url: str = "" - debian_command: str = "" - rpm_command: str = "" - tgz_command: str = "" - docker_command: str = "" + debian: str = "" + rpm: str = "" + tgz: str = "" + docker: str = "" release_progress: str = "" - progress_description: str = "" + progress_status: str = "" def is_patch(self): return self.release_branch != "master" @@ -129,12 +130,15 @@ class ReleaseInfo: print(json.dumps(dataclasses.asdict(self), indent=2), file=f) return self - def prepare(self, commit_ref: str, release_type: str) -> "ReleaseInfo": + def prepare( + self, commit_ref: str, release_type: str, skip_tag_check: bool + ) -> "ReleaseInfo": version = None release_branch = None release_tag = None previous_release_tag = None previous_release_sha = None + latest_release = False codename = "" assert release_type in ("patch", "new") if release_type == "new": @@ -145,7 +149,7 @@ class ReleaseInfo: verbose=True, ) with checkout(commit_ref): - commit_sha = Shell.get_output_or_raise(f"git rev-parse {commit_ref}") + commit_sha = Shell.get_output_or_raise(f"git rev-list -n1 {commit_ref}") # Git() must be inside "with checkout" contextmanager git = Git() version = get_version_from_repo(git=git) @@ -158,12 +162,12 @@ class ReleaseInfo: release_tag = version.describe previous_release_tag = expected_prev_tag previous_release_sha = Shell.get_output_or_raise( - f"git rev-parse {previous_release_tag}" + f"git rev-list -n1 {previous_release_tag}" ) assert previous_release_sha if release_type == "patch": with checkout(commit_ref): - commit_sha = Shell.get_output_or_raise(f"git rev-parse {commit_ref}") + commit_sha = Shell.get_output_or_raise(f"git rev-list -n1 {commit_ref}") # Git() must be inside "with checkout" contextmanager git = Git() version = get_version_from_repo(git=git) @@ -200,16 +204,20 @@ class ReleaseInfo: expected_tag_prefix ) and git.latest_tag.endswith(expected_tag_suffix): pass - else: + elif not skip_tag_check: assert ( False - ), f"BUG: Unexpected latest tag [{git.latest_tag}] expected [{expected_tag_prefix}*{expected_tag_suffix}]" + ), f"BUG: Unexpected latest tag [{git.latest_tag}] expected [{expected_tag_prefix}*{expected_tag_suffix}]. Already Released?" previous_release_sha = Shell.get_output_or_raise( - f"git rev-parse {previous_release_tag}" + f"git rev-list -n1 {previous_release_tag}" ) assert previous_release_sha + if CI.GH.is_latest_release_branch(release_branch): + print("This is going to be the latest release!") + latest_release = True + assert ( release_branch and previous_release_tag @@ -218,7 +226,7 @@ class ReleaseInfo: and release_tag and version and (codename in ("lts", "stable") or release_type == "new") - ) + ), f"Check: {release_branch}, {previous_release_tag}, {previous_release_sha}, {commit_sha}, {release_tag}, {version}" self.release_branch = release_branch self.commit_sha = commit_sha @@ -228,7 +236,8 @@ class ReleaseInfo: self.previous_release_tag = previous_release_tag self.previous_release_sha = previous_release_sha self.release_progress = ReleaseProgress.STARTED - self.progress_description = ReleaseProgressDescription.OK + self.progress_status = ReleaseProgressDescription.OK + self.latest = latest_release return self def push_release_tag(self, dry_run: bool) -> None: @@ -252,7 +261,7 @@ class ReleaseInfo: @staticmethod def _create_gh_label(label: str, color_hex: str, dry_run: bool) -> None: - cmd = f"gh api repos/{GITHUB_REPOSITORY}/labels -f name={label} -f color={color_hex}" + cmd = f"gh api repos/{CI.Envs.GITHUB_REPOSITORY}/labels -f name={label} -f color={color_hex}" Shell.check(cmd, dry_run=dry_run, strict=True) def push_new_release_branch(self, dry_run: bool) -> None: @@ -294,7 +303,7 @@ class ReleaseInfo: f"v{new_release_branch}-affected", "c2bfff", dry_run=dry_run ) Shell.check( - f"""gh pr create --repo {GITHUB_REPOSITORY} --title 'Release pull request for branch {new_release_branch}' + f"""gh pr create --repo {CI.Envs.GITHUB_REPOSITORY} --title 'Release pull request for branch {new_release_branch}' --head {new_release_branch} {pr_labels} --body 'This PullRequest is a part of ClickHouse release cycle. It is used by CI system only. Do not perform any changes with it.' """, @@ -303,9 +312,12 @@ class ReleaseInfo: verbose=True, ) + def get_version_bump_branch(self): + return f"bump_version_{self.version}" + def update_version_and_contributors_list(self, dry_run: bool) -> None: # Bump version, update contributors list, create PR - branch_upd_version_contributors = f"bump_version_{self.version}" + branch_upd_version_contributors = self.get_version_bump_branch() with checkout(self.commit_sha): git = Git() version = get_version_from_repo(git=git) @@ -323,9 +335,9 @@ class ReleaseInfo: update_contributors(raise_error=True) cmd_commit_version_upd = f"{GIT_PREFIX} commit '{CMAKE_PATH}' '{CONTRIBUTORS_PATH}' -m 'Update autogenerated version to {self.version} and contributors'" cmd_push_branch = f"{GIT_PREFIX} push --set-upstream origin {branch_upd_version_contributors}" - body_file = get_abs_path(".github/PULL_REQUEST_TEMPLATE.md") actor = os.getenv("GITHUB_ACTOR", "") or "me" - cmd_create_pr = f"gh pr create --repo {GITHUB_REPOSITORY} --title 'Update version after release' --head {branch_upd_version_contributors} --base {self.release_branch} --body-file {body_file} --label 'do not test' --assignee {actor}" + body = f"Automatic version bump after release {self.release_tag}\n### Changelog category (leave one):\n- Not for changelog (changelog entry is not required)\n" + cmd_create_pr = f"gh pr create --repo {CI.Envs.GITHUB_REPOSITORY} --title 'Update version after release' --head {branch_upd_version_contributors} --base {self.release_branch} --body \"{body}\" --assignee {actor}" Shell.check( cmd_commit_version_upd, strict=True, dry_run=dry_run, verbose=True ) @@ -342,30 +354,42 @@ class ReleaseInfo: ) self.version_bump_pr = "dry-run" else: - self.version_bump_pr = GHActions.get_pr_url_by_branch( - repo=GITHUB_REPOSITORY, branch=branch_upd_version_contributors + self.version_bump_pr = GH.get_pr_url_by_branch( + branch=branch_upd_version_contributors ) + def get_change_log_branch(self): + return f"auto/{self.release_tag}" + def update_release_info(self, dry_run: bool) -> "ReleaseInfo": if self.release_branch != "master": - branch = f"auto/{release_info.release_tag}" - if not dry_run: - url = GHActions.get_pr_url_by_branch( - repo=GITHUB_REPOSITORY, branch=branch - ) - else: - url = "dry-run" - print(f"ChangeLog PR url [{url}]") - self.changelog_pr = url - print(f"Release url [{url}]") - self.release_url = f"https://github.com/{GITHUB_REPOSITORY}/releases/tag/{self.release_tag}" - if self.release_progress == ReleaseProgress.COMPLETED: - self.docker_command = f"docker run --rm clickhouse/clickhouse:{self.version} clickhouse --version" + if not self.changelog_pr: + branch = self.get_change_log_branch() + if not dry_run: + url = GH.get_pr_url_by_branch(branch=branch) + else: + url = "dry-run" + print(f"ChangeLog PR url [{url}]") + self.changelog_pr = url + + if not self.version_bump_pr: + branch = self.get_version_bump_branch() + if not dry_run: + url = GH.get_pr_url_by_branch(branch=branch) + else: + url = "dry-run" + print(f"Version bump PR url [{url}]") + self.version_bump_pr = url + + self.release_url = f"https://github.com/{CI.Envs.GITHUB_REPOSITORY}/releases/tag/{self.release_tag}" + print(f"Release url [{self.release_url}]") + + self.docker = f"docker run --rm clickhouse/clickhouse:{self.version} clickhouse --version" self.dump() return self def create_gh_release(self, packages_files: List[str], dry_run: bool) -> None: - repo = os.getenv("GITHUB_REPOSITORY") + repo = CI.Envs.GITHUB_REPOSITORY assert repo cmds = [ f"gh release create --repo {repo} --title 'Release {self.release_tag}' {self.release_tag}" @@ -375,7 +399,9 @@ class ReleaseInfo: if not dry_run: for cmd in cmds: Shell.check(cmd, strict=True, verbose=True) - self.release_url = f"https://github.com/{GITHUB_REPOSITORY}/releases/tag/{self.release_tag}" + self.release_url = ( + f"https://github.com/{repo}/releases/tag/{self.release_tag}" + ) else: print("Dry-run, would run commands:") print("\n * ".join(cmds)) @@ -536,7 +562,7 @@ class PackageDownloader: ] ) self.s3.download_file( - bucket=S3_BUILDS_BUCKET, + bucket=CI.Envs.S3_BUILDS_BUCKET, s3_path=s3_path, local_file_path="/".join([self.LOCAL_DIR, package_file]), ) @@ -557,7 +583,7 @@ class PackageDownloader: ] ) self.s3.download_file( - bucket=S3_BUILDS_BUCKET, + bucket=CI.Envs.S3_BUILDS_BUCKET, s3_path=s3_path, local_file_path="/".join([self.LOCAL_DIR, destination_binary_name]), ) @@ -636,6 +662,11 @@ def parse_args() -> argparse.Namespace: action="store_true", help="Initial step to prepare info like release branch, release tag, etc.", ) + parser.add_argument( + "--skip-tag-check", + action="store_true", + help="To skip check against latest git tag on a release branch", + ) parser.add_argument( "--push-release-tag", action="store_true", @@ -725,7 +756,11 @@ if __name__ == "__main__": assert ( args.ref and args.release_type ), "--ref and --release-type must be provided with --prepare-release-info" - release_info.prepare(commit_ref=args.ref, release_type=args.release_type) + release_info.prepare( + commit_ref=args.ref, + release_type=args.release_type, + skip_tag_check=args.skip_tag_check, + ) if args.download_packages: with ReleaseContextManager( @@ -776,7 +811,7 @@ if __name__ == "__main__": else: title = "New release" if ( - release_info.progress_description == ReleaseProgressDescription.OK + release_info.progress_status == ReleaseProgressDescription.OK and release_info.release_progress == ReleaseProgress.COMPLETED ): title = "Completed: " + title @@ -792,16 +827,16 @@ if __name__ == "__main__": if args.set_progress_started: ri = ReleaseInfo.from_file() ri.release_progress = args.progress - ri.progress_description = ReleaseProgressDescription.FAILED + ri.progress_status = ReleaseProgressDescription.FAILED ri.dump() assert args.progress, "Progress step name must be provided" if args.set_progress_completed: ri = ReleaseInfo.from_file() assert ( - ri.progress_description == ReleaseProgressDescription.FAILED + ri.progress_status == ReleaseProgressDescription.FAILED ), "Must be FAILED before set to OK" - ri.progress_description = ReleaseProgressDescription.OK + ri.progress_status = ReleaseProgressDescription.OK ri.dump() if args.merge_prs: diff --git a/tests/ci/docker_server.py b/tests/ci/docker_server.py index 3e782c079c6..8f0474d5053 100644 --- a/tests/ci/docker_server.py +++ b/tests/ci/docker_server.py @@ -69,13 +69,14 @@ def parse_args() -> argparse.Namespace: help="sha of the commit to use packages from", ) parser.add_argument( - "--release-type", + "--tag-type", type=str, - choices=("auto", "latest", "major", "minor", "patch", "head"), + choices=("head", "release", "latest-release"), default="head", - help="version part that will be updated when '--version' is set; " - "'auto' is a special case, it will get versions from github and detect the " - "release type (latest, major, minor or patch) automatically", + help="defines required tags for resulting docker image. " + "head - for master image (tag: head) " + "release - for release image (tags: XX, XX.XX, XX.XX.XX, XX.XX.XX.XX) " + "release-latest - for latest release image (tags: XX, XX.XX, XX.XX.XX, XX.XX.XX.XX, latest) ", ) parser.add_argument( "--image-path", @@ -149,74 +150,35 @@ def retry_popen(cmd: str, log_file: Path) -> int: return retcode -def auto_release_type(version: ClickHouseVersion, release_type: str) -> str: - if release_type != "auto": - return release_type - - git_versions = get_tagged_versions() - reference_version = git_versions[0] - for i in reversed(range(len(git_versions))): - if git_versions[i] <= version: - if i == len(git_versions) - 1: - return "latest" - reference_version = git_versions[i + 1] - break - - if version.major < reference_version.major: - return "major" - if version.minor < reference_version.minor: - return "minor" - if version.patch < reference_version.patch: - return "patch" - - raise ValueError( - "Release type 'tweak' is not supported for " - f"{version.string} < {reference_version.string}" - ) - - -def gen_tags(version: ClickHouseVersion, release_type: str) -> List[str]: +def gen_tags(version: ClickHouseVersion, tag_type: str) -> List[str]: """ - 22.2.2.2 + latest: + @tag_type release-latest, @version 22.2.2.2: - latest - 22 - 22.2 - 22.2.2 - 22.2.2.2 - 22.2.2.2 + major: + @tag_type release, @version 22.2.2.2: - 22 - 22.2 - 22.2.2 - 22.2.2.2 - 22.2.2.2 + minor: - - 22.2 - - 22.2.2 - - 22.2.2.2 - 22.2.2.2 + patch: - - 22.2.2 - - 22.2.2.2 - 22.2.2.2 + head: + @tag_type head: - head """ parts = version.string.split(".") tags = [] - if release_type == "latest": - tags.append(release_type) + if tag_type == "release-latest": + tags.append("latest") for i in range(len(parts)): tags.append(".".join(parts[: i + 1])) - elif release_type == "major": + elif tag_type == "head": + tags.append(tag_type) + elif tag_type == "release": for i in range(len(parts)): tags.append(".".join(parts[: i + 1])) - elif release_type == "minor": - for i in range(1, len(parts)): - tags.append(".".join(parts[: i + 1])) - elif release_type == "patch": - for i in range(2, len(parts)): - tags.append(".".join(parts[: i + 1])) - elif release_type == "head": - tags.append(release_type) else: - raise ValueError(f"{release_type} is not valid release part") + assert False, f"Invalid release type [{tag_type}]" return tags @@ -370,8 +332,7 @@ def main(): push = True image = DockerImageData(image_path, image_repo, False) - args.release_type = auto_release_type(args.version, args.release_type) - tags = gen_tags(args.version, args.release_type) + tags = gen_tags(args.version, args.tag_type) repo_urls = {} direct_urls: Dict[str, List[str]] = {} diff --git a/tests/ci/test_docker.py b/tests/ci/test_docker.py index 662143bfd9b..58ebe4ecbb1 100644 --- a/tests/ci/test_docker.py +++ b/tests/ci/test_docker.py @@ -1,61 +1,19 @@ #!/usr/bin/env python import unittest -from unittest.mock import patch, MagicMock from version_helper import get_version_from_string import docker_server as ds -# di.logging.basicConfig(level=di.logging.INFO) - class TestDockerServer(unittest.TestCase): def test_gen_tags(self): version = get_version_from_string("22.2.2.2") cases = ( - ("latest", ["latest", "22", "22.2", "22.2.2", "22.2.2.2"]), - ("major", ["22", "22.2", "22.2.2", "22.2.2.2"]), - ("minor", ["22.2", "22.2.2", "22.2.2.2"]), - ("patch", ["22.2.2", "22.2.2.2"]), + ("release-latest", ["latest", "22", "22.2", "22.2.2", "22.2.2.2"]), + ("release", ["22", "22.2", "22.2.2", "22.2.2.2"]), ("head", ["head"]), ) for case in cases: release_type = case[0] self.assertEqual(case[1], ds.gen_tags(version, release_type)) - - with self.assertRaises(ValueError): - ds.gen_tags(version, "auto") - - @patch("docker_server.get_tagged_versions") - def test_auto_release_type(self, mock_tagged_versions: MagicMock) -> None: - mock_tagged_versions.return_value = [ - get_version_from_string("1.1.1.1"), - get_version_from_string("1.2.1.1"), - get_version_from_string("2.1.1.1"), - get_version_from_string("2.2.1.1"), - get_version_from_string("2.2.2.1"), - ] - - cases_less = ( - (get_version_from_string("1.0.1.1"), "minor"), - (get_version_from_string("1.1.2.1"), "minor"), - (get_version_from_string("1.3.1.1"), "major"), - (get_version_from_string("2.1.2.1"), "minor"), - (get_version_from_string("2.2.1.3"), "patch"), - (get_version_from_string("2.2.3.1"), "latest"), - (get_version_from_string("2.3.1.1"), "latest"), - ) - for case in cases_less: - release = ds.auto_release_type(case[0], "auto") - self.assertEqual(case[1], release) - - cases_equal = ( - (get_version_from_string("1.1.1.1"), "minor"), - (get_version_from_string("1.2.1.1"), "major"), - (get_version_from_string("2.1.1.1"), "minor"), - (get_version_from_string("2.2.1.1"), "patch"), - (get_version_from_string("2.2.2.1"), "latest"), - ) - for case in cases_equal: - release = ds.auto_release_type(case[0], "auto") - self.assertEqual(case[1], release) From ae6d4bdd8aa2b010f27dd8ebbb6f816362eb2a9e Mon Sep 17 00:00:00 2001 From: Sasha Sheikin Date: Mon, 29 Jul 2024 10:31:35 +0200 Subject: [PATCH 620/661] Fix positionCaseInsensitive example --- .../en/sql-reference/functions/string-search-functions.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/en/sql-reference/functions/string-search-functions.md b/docs/en/sql-reference/functions/string-search-functions.md index b7ba1d4feb7..e9ff7ebf33b 100644 --- a/docs/en/sql-reference/functions/string-search-functions.md +++ b/docs/en/sql-reference/functions/string-search-functions.md @@ -150,15 +150,15 @@ A case insensitive invariant of [position](#position). Query: ``` sql -SELECT position('Hello, world!', 'hello'); +SELECT positionCaseInsensitive('Hello, world!', 'hello'); ``` Result: ``` text -┌─position('Hello, world!', 'hello')─┐ -│ 0 │ -└────────────────────────────────────┘ +┌─positionCaseInsensitive('Hello, world!', 'hello')─┐ +│ 1 │ +└───────────────────────────────────────────────────┘ ``` ## positionUTF8 From b38c46a87d6eacbc7805562deb07ce586fd7e0fb Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 31 Jul 2024 17:38:20 +0200 Subject: [PATCH 621/661] Minor change --- src/Databases/DatabaseOnDisk.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Databases/DatabaseOnDisk.cpp b/src/Databases/DatabaseOnDisk.cpp index f419f5811a1..734f354d9a5 100644 --- a/src/Databases/DatabaseOnDisk.cpp +++ b/src/Databases/DatabaseOnDisk.cpp @@ -313,7 +313,7 @@ void DatabaseOnDisk::detachTablePermanently(ContextPtr query_context, const Stri std::lock_guard lock(mutex); if (const auto it = snapshot_detached_tables.find(table_name); it == snapshot_detached_tables.end()) { - throw Exception(ErrorCodes::LOGICAL_ERROR, "Snapshot doesn't contain info about detached table={}", table_name); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Snapshot doesn't contain info about detached table `{}`", table_name); } else { From e3239c6ee11eb5bf0466fb750c58125868885ec8 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 2 Aug 2024 19:09:16 +0200 Subject: [PATCH 622/661] Fix bad log message in JIT for sorting --- src/Core/SortDescription.cpp | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/Core/SortDescription.cpp b/src/Core/SortDescription.cpp index 9edc79a1ff1..1b3f81f8547 100644 --- a/src/Core/SortDescription.cpp +++ b/src/Core/SortDescription.cpp @@ -103,7 +103,15 @@ static std::string getSortDescriptionDump(const SortDescription & description, c WriteBufferFromOwnString buffer; for (size_t i = 0; i < description.size(); ++i) - buffer << header_types[i]->getName() << ' ' << description[i].direction << ' ' << description[i].nulls_direction; + { + if (i != 0) + buffer << ", "; + + buffer << "(type: " << header_types[i]->getName() + << ", direction: " << description[i].direction + << ", nulls_direction: " << description[i].nulls_direction + << ")"; + } return buffer.str(); } From 3c4389ec4d78db55ce742e5d5a3b0ed050c9c9e6 Mon Sep 17 00:00:00 2001 From: Denny Crane Date: Fri, 2 Aug 2024 14:57:19 -0300 Subject: [PATCH 623/661] doc/fix max_partitions_to_read description --- .../operations/settings/merge-tree-settings.md | 7 ++----- .../en/operations/settings/query-complexity.md | 18 ++++++++++++++++-- 2 files changed, 18 insertions(+), 7 deletions(-) diff --git a/docs/en/operations/settings/merge-tree-settings.md b/docs/en/operations/settings/merge-tree-settings.md index 7278b91f90d..67fa45c20cd 100644 --- a/docs/en/operations/settings/merge-tree-settings.md +++ b/docs/en/operations/settings/merge-tree-settings.md @@ -119,11 +119,6 @@ Minimum size of blocks of uncompressed data required for compression when writin You can also specify this setting in the global settings (see [min_compress_block_size](/docs/en/operations/settings/settings.md/#min-compress-block-size) setting). The value specified when table is created overrides the global value for this setting. -## max_partitions_to_read - -Limits the maximum number of partitions that can be accessed in one query. -You can also specify setting [max_partitions_to_read](/docs/en/operations/settings/merge-tree-settings.md/#max-partitions-to-read) in the global setting. - ## max_suspicious_broken_parts If the number of broken parts in a single partition exceeds the `max_suspicious_broken_parts` value, automatic deletion is denied. @@ -691,6 +686,8 @@ Possible values: Default value: -1 (unlimited). +You can also specify a query complexity setting [max_partitions_to_read](query-complexity#max-partitions-to-read) at a query / session / profile level. + ## min_age_to_force_merge_seconds {#min_age_to_force_merge_seconds} Merge parts if every part in the range is older than the value of `min_age_to_force_merge_seconds`. diff --git a/docs/en/operations/settings/query-complexity.md b/docs/en/operations/settings/query-complexity.md index 2a20e74e20f..14ccb1167f9 100644 --- a/docs/en/operations/settings/query-complexity.md +++ b/docs/en/operations/settings/query-complexity.md @@ -188,7 +188,7 @@ If you set `timeout_before_checking_execution_speed `to 0, ClickHouse will use c What to do if the query is run longer than `max_execution_time` or the estimated running time is longer than `max_estimated_execution_time`: `throw` or `break`. By default, `throw`. -# max_execution_time_leaf +## max_execution_time_leaf Similar semantic to `max_execution_time` but only apply on leaf node for distributed or remote queries. @@ -204,7 +204,7 @@ We can use `max_execution_time_leaf` as the query settings: SELECT count() FROM cluster(cluster, view(SELECT * FROM t)) SETTINGS max_execution_time_leaf = 10; ``` -# timeout_overflow_mode_leaf +## timeout_overflow_mode_leaf What to do when the query in leaf node run longer than `max_execution_time_leaf`: `throw` or `break`. By default, `throw`. @@ -426,3 +426,17 @@ Example: ``` Default value: 0 (Infinite count of simultaneous sessions). + +## max_partitions_to_read {#max-partitions-to-read} + +Limits the maximum number of partitions that can be accessed in one query. + +The setting value specified when the table is created can be overridden via query-level setting. + +Possible values: + +- Any positive integer. + +Default value: -1 (unlimited). + +You can also specify a MergeTree setting [max_partitions_to_read](merge-tree-settings#max-partitions-to-read) in tables' setting. From ce39957983af8bdd7d97e4a3729e2f97d3e0cb85 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 2 Aug 2024 20:11:25 +0200 Subject: [PATCH 624/661] Remove capitalization in test reports --- tests/ci/report.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/tests/ci/report.py b/tests/ci/report.py index f50ed4c1f85..3f0fc596824 100644 --- a/tests/ci/report.py +++ b/tests/ci/report.py @@ -293,9 +293,9 @@ class JobReport: start_time: str duration: float additional_files: Union[Sequence[str], Sequence[Path]] - # clickhouse version, build job only + # ClickHouse version, build job only version: str = "" - # checkname to set in commit status, set if differs from jjob name + # check_name to be set in commit status, set it if it differs from the job name check_name: str = "" # directory with artifacts to upload on s3 build_dir_for_upload: Union[Path, str] = "" @@ -667,11 +667,7 @@ ColorTheme = Tuple[str, str, str] def _format_header( header: str, branch_name: str, branch_url: Optional[str] = None ) -> str: - # Following line does not lower CI->Ci and SQLancer->Sqlancer. It only - # capitalizes the first letter and doesn't touch the rest of the word - result = " ".join([w[0].upper() + w[1:] for w in header.split(" ") if w]) - result = result.replace("Clickhouse", "ClickHouse") - result = result.replace("clickhouse", "ClickHouse") + result = header if "ClickHouse" not in result: result = f"ClickHouse {result}" if branch_url: From 2c9b61d047c1afe22b0fa0a967a87db8bd4cf62f Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 2 Aug 2024 20:16:44 +0200 Subject: [PATCH 625/661] Miscellaneous --- tests/ci/ci.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/ci/ci.py b/tests/ci/ci.py index 935fe472e50..6ca84a346e2 100644 --- a/tests/ci/ci.py +++ b/tests/ci/ci.py @@ -1019,7 +1019,7 @@ def _get_ext_check_name(check_name: str) -> str: return check_name_with_group -def _cancel_pr_wf(s3: S3Helper, pr_number: int, cancel_sync: bool = False) -> None: +def _cancel_pr_workflow(s3: S3Helper, pr_number: int, cancel_sync: bool = False) -> None: wf_data = CiMetadata(s3, pr_number).fetch_meta() if not cancel_sync: if not wf_data.run_id: @@ -1368,12 +1368,12 @@ def main() -> int: assert indata, "Run config must be provided via --infile" _update_gh_statuses_action(indata=indata, s3=s3) - ### CANCEL PREVIOUS WORKFLOW RUN + ### CANCEL THE PREVIOUS WORKFLOW RUN elif args.cancel_previous_run: if pr_info.is_merge_queue: - _cancel_pr_wf(s3, pr_info.merged_pr) + _cancel_pr_workflow(s3, pr_info.merged_pr) elif pr_info.is_pr: - _cancel_pr_wf(s3, pr_info.number, cancel_sync=True) + _cancel_pr_workflow(s3, pr_info.number, cancel_sync=True) else: assert False, "BUG! Not supported scenario" From bd3606dac4954c673ec6c38dd6fbdb70bc7b53cc Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 2 Aug 2024 20:17:07 +0200 Subject: [PATCH 626/661] Fix typos --- tests/ci/commit_status_helper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ci/commit_status_helper.py b/tests/ci/commit_status_helper.py index fdc9c002b66..908ac4a7dca 100644 --- a/tests/ci/commit_status_helper.py +++ b/tests/ci/commit_status_helper.py @@ -301,7 +301,7 @@ def get_worst_state(statuses: CommitStatuses) -> StatusType: def create_ci_report(pr_info: PRInfo, statuses: CommitStatuses) -> str: - """The function converst the statuses to TestResults and uploads the report + """The function converts the statuses to TestResults and uploads the report to S3 tests bucket. Then it returns the URL""" test_results = [] # type: TestResults for status in statuses: From 675afda17210ca7e8e71e0899a5ed14d7227fb55 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 2 Aug 2024 20:22:08 +0200 Subject: [PATCH 627/661] Fix check names in the CI Logs database --- tests/ci/clickhouse_helper.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/ci/clickhouse_helper.py b/tests/ci/clickhouse_helper.py index 287970cce9a..0725f7100d1 100644 --- a/tests/ci/clickhouse_helper.py +++ b/tests/ci/clickhouse_helper.py @@ -3,6 +3,7 @@ import fileinput import json import logging import time +import os from pathlib import Path from typing import Any, Dict, List, Optional @@ -298,6 +299,11 @@ class CiLogsCredentials: def get_docker_arguments( self, pr_info: PRInfo, check_start_time: str, check_name: str ) -> str: + run_by_hash_total = int(os.getenv("RUN_BY_HASH_TOTAL", "0")) + if run_by_hash_total > 1: + run_by_hash_num = int(os.getenv("RUN_BY_HASH_NUM", "0")) + check_name = f"{check_name} [{run_by_hash_num + 1}/{run_by_hash_total}]" + self.create_ci_logs_credentials() if not self.config_path.exists(): logging.info("Do not use external logs pushing") From b282be83c500bac5544424378b9505fc8c28e432 Mon Sep 17 00:00:00 2001 From: Max K Date: Fri, 2 Aug 2024 20:03:43 +0200 Subject: [PATCH 628/661] remove old workflows --- .github/actions/release/action.yml | 166 --------------------------- .github/workflows/auto_release.yml | 111 ------------------ .github/workflows/create_release.yml | 2 + .github/workflows/release.yml | 71 ------------ .github/workflows/tags_stable.yml | 74 ------------ tests/ci/docker_server.py | 1 - 6 files changed, 2 insertions(+), 423 deletions(-) delete mode 100644 .github/actions/release/action.yml delete mode 100644 .github/workflows/auto_release.yml delete mode 100644 .github/workflows/release.yml delete mode 100644 .github/workflows/tags_stable.yml diff --git a/.github/actions/release/action.yml b/.github/actions/release/action.yml deleted file mode 100644 index a287aa8b41d..00000000000 --- a/.github/actions/release/action.yml +++ /dev/null @@ -1,166 +0,0 @@ -name: Release - -description: Makes patch releases and creates new release branch - -inputs: - ref: - description: 'Git reference (branch or commit sha) from which to create the release' - required: true - type: string - type: - description: 'The type of release: "new" for a new release or "patch" for a patch release' - required: true - type: choice - options: - - patch - - new - dry-run: - description: 'Dry run' - required: true - type: boolean - token: - required: true - type: string - -runs: - using: "composite" - steps: - - name: Prepare Release Info - shell: bash - run: | - python3 ./tests/ci/create_release.py --prepare-release-info \ - --ref ${{ inputs.ref }} --release-type ${{ inputs.type }} ${{ inputs.dry-run == true && '--dry-run' || '' }} - echo "::group::Release Info" - python3 -m json.tool /tmp/release_info.json - echo "::endgroup::" - release_tag=$(jq -r '.release_tag' /tmp/release_info.json) - commit_sha=$(jq -r '.commit_sha' /tmp/release_info.json) - echo "Release Tag: $release_tag" - echo "RELEASE_TAG=$release_tag" >> "$GITHUB_ENV" - echo "COMMIT_SHA=$commit_sha" >> "$GITHUB_ENV" - - name: Download All Release Artifacts - if: ${{ inputs.type == 'patch' }} - shell: bash - run: | - python3 ./tests/ci/create_release.py --download-packages ${{ inputs.dry-run == true && '--dry-run' || '' }} - - name: Push Git Tag for the Release - shell: bash - run: | - python3 ./tests/ci/create_release.py --push-release-tag ${{ inputs.dry-run == true && '--dry-run' || '' }} - - name: Push New Release Branch - if: ${{ inputs.type == 'new' }} - shell: bash - run: | - python3 ./tests/ci/create_release.py --push-new-release-branch ${{ inputs.dry-run == true && '--dry-run' || '' }} - - name: Bump CH Version and Update Contributors' List - shell: bash - run: | - python3 ./tests/ci/create_release.py --create-bump-version-pr ${{ inputs.dry-run == true && '--dry-run' || '' }} - - name: Bump Docker versions, Changelog, Security - if: ${{ inputs.type == 'patch' }} - shell: bash - run: | - git checkout master - python3 ./tests/ci/create_release.py --set-progress-started --progress "update changelog, docker version, security" - echo "List versions" - ./utils/list-versions/list-versions.sh > ./utils/list-versions/version_date.tsv - echo "Update docker version" - ./utils/list-versions/update-docker-version.sh - echo "Generate ChangeLog" - export CI=1 - docker run -u "${UID}:${GID}" -e PYTHONUNBUFFERED=1 -e CI=1 --network=host \ - --volume=".:/ClickHouse" clickhouse/style-test \ - /ClickHouse/tests/ci/changelog.py -v --debug-helpers \ - --gh-user-or-token=${{ inputs.token }} --jobs=5 \ - --output="/ClickHouse/docs/changelogs/${{ env.RELEASE_TAG }}.md" ${{ env.RELEASE_TAG }} - git add ./docs/changelogs/${{ env.RELEASE_TAG }}.md - echo "Generate Security" - python3 ./utils/security-generator/generate_security.py > SECURITY.md - git diff HEAD - - name: Create ChangeLog PR - if: ${{ inputs.type == 'patch' && ! inputs.dry-run }} - uses: peter-evans/create-pull-request@v6 - with: - author: "robot-clickhouse " - token: ${{ inputs.token }} - committer: "robot-clickhouse " - commit-message: Update version_date.tsv and changelogs after ${{ env.RELEASE_TAG }} - branch: auto/${{ env.RELEASE_TAG }} - assignees: ${{ github.event.sender.login }} # assign the PR to the tag pusher - delete-branch: true - title: Update version_date.tsv and changelog after ${{ env.RELEASE_TAG }} - labels: do not test - body: | - Update version_date.tsv and changelogs after ${{ env.RELEASE_TAG }} - ### Changelog category (leave one): - - Not for changelog (changelog entry is not required) - - name: Complete previous steps and Restore git state - if: ${{ inputs.type == 'patch' }} - shell: bash - run: | - python3 ./tests/ci/create_release.py --set-progress-completed - git reset --hard HEAD - git checkout "$GITHUB_REF_NAME" - - name: Create GH Release - shell: bash - if: ${{ inputs.type == 'patch' }} - run: | - python3 ./tests/ci/create_release.py --create-gh-release ${{ inputs.dry-run == true && '--dry-run' || '' }} - - name: Export TGZ Packages - if: ${{ inputs.type == 'patch' }} - shell: bash - run: | - python3 ./tests/ci/artifactory.py --export-tgz ${{ inputs.dry-run == true && '--dry-run' || '' }} - - name: Test TGZ Packages - if: ${{ inputs.type == 'patch' }} - shell: bash - run: | - python3 ./tests/ci/artifactory.py --test-tgz ${{ inputs.dry-run == true && '--dry-run' || '' }} - - name: Export RPM Packages - if: ${{ inputs.type == 'patch' }} - shell: bash - run: | - python3 ./tests/ci/artifactory.py --export-rpm ${{ inputs.dry-run == true && '--dry-run' || '' }} - - name: Test RPM Packages - if: ${{ inputs.type == 'patch' }} - shell: bash - run: | - python3 ./tests/ci/artifactory.py --test-rpm ${{ inputs.dry-run == true && '--dry-run' || '' }} - - name: Export Debian Packages - if: ${{ inputs.type == 'patch' }} - shell: bash - run: | - python3 ./tests/ci/artifactory.py --export-debian ${{ inputs.dry-run == true && '--dry-run' || '' }} - - name: Test Debian Packages - if: ${{ inputs.type == 'patch' }} - shell: bash - run: | - python3 ./tests/ci/artifactory.py --test-debian ${{ inputs.dry-run == true && '--dry-run' || '' }} - - name: Docker clickhouse/clickhouse-server building - if: ${{ inputs.type == 'patch' }} - shell: bash - run: | - cd "./tests/ci" - python3 ./create_release.py --set-progress-started --progress "docker server release" - export CHECK_NAME="Docker server image" - python3 docker_server.py --release-type auto --version ${{ env.RELEASE_TAG }} --check-name "$CHECK_NAME" --sha ${{ env.COMMIT_SHA }} ${{ ! inputs.dry-run && '--push' || '' }} - python3 ./create_release.py --set-progress-completed - - name: Docker clickhouse/clickhouse-keeper building - if: ${{ inputs.type == 'patch' }} - shell: bash - run: | - cd "./tests/ci" - python3 ./create_release.py --set-progress-started --progress "docker keeper release" - export CHECK_NAME="Docker keeper image" - python3 docker_server.py --release-type auto --version ${{ env.RELEASE_TAG }} --check-name "$CHECK_NAME" --sha ${{ env.COMMIT_SHA }} ${{ ! inputs.dry-run && '--push' || '' }} - python3 ./create_release.py --set-progress-completed - - name: Set current Release progress to Completed with OK - shell: bash - run: | - python3 ./tests/ci/create_release.py --set-progress-started --progress "completed" - python3 ./tests/ci/create_release.py --set-progress-completed - - name: Post Slack Message - if: ${{ !cancelled() }} - shell: bash - run: | - python3 ./tests/ci/create_release.py --post-status ${{ inputs.dry-run == true && '--dry-run' || '' }} diff --git a/.github/workflows/auto_release.yml b/.github/workflows/auto_release.yml deleted file mode 100644 index 457ffacc7a8..00000000000 --- a/.github/workflows/auto_release.yml +++ /dev/null @@ -1,111 +0,0 @@ -name: AutoRelease - -env: - PYTHONUNBUFFERED: 1 - DRY_RUN: true - -concurrency: - group: release -on: # yamllint disable-line rule:truthy - # Workflow uses a test bucket for packages and dry run mode (no real releases) - schedule: - - cron: '0 9 * * *' - - cron: '0 15 * * *' - workflow_dispatch: - inputs: - dry-run: - description: 'Dry run' - required: false - default: true - type: boolean - -jobs: - AutoRelease: - runs-on: [self-hosted, release-maker] - steps: - - name: DebugInfo - uses: hmarr/debug-action@f7318c783045ac39ed9bb497e22ce835fdafbfe6 - - name: Set envs - run: | - cat >> "$GITHUB_ENV" << 'EOF' - ROBOT_CLICKHOUSE_SSH_KEY<> "$GITHUB_ENV" - - name: Set DRY_RUN for dispatch - if: ${{ github.event_name == 'workflow_dispatch' }} - run: echo "DRY_RUN=${{ github.event.inputs.dry-run }}" >> "$GITHUB_ENV" - - name: Check out repository code - uses: ClickHouse/checkout@v1 - with: - token: ${{secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN}} - fetch-depth: 0 - - name: Auto Release Prepare - run: | - cd "$GITHUB_WORKSPACE/tests/ci" - python3 auto_release.py --prepare - echo "::group::Auto Release Info" - python3 -m json.tool /tmp/autorelease_info.json - echo "::endgroup::" - { - echo 'AUTO_RELEASE_PARAMS<> "$GITHUB_ENV" - - name: Post Release Branch statuses - run: | - cd "$GITHUB_WORKSPACE/tests/ci" - python3 auto_release.py --post-status - - name: Release ${{ fromJson(env.AUTO_RELEASE_PARAMS).releases[0].release_branch }} - if: ${{ fromJson(env.AUTO_RELEASE_PARAMS).releases[0] && fromJson(env.AUTO_RELEASE_PARAMS).releases[0].ready }} - uses: ./.github/actions/release - with: - ref: ${{ fromJson(env.AUTO_RELEASE_PARAMS).releases[0].commit_sha }} - type: patch - dry-run: ${{ env.DRY_RUN }} - token: ${{secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN}} - - name: Release ${{ fromJson(env.AUTO_RELEASE_PARAMS).releases[1].release_branch }} - if: ${{ fromJson(env.AUTO_RELEASE_PARAMS).releases[0] && fromJson(env.AUTO_RELEASE_PARAMS).releases[1].ready }} - uses: ./.github/actions/release - with: - ref: ${{ fromJson(env.AUTO_RELEASE_PARAMS).releases[1].commit_sha }} - type: patch - dry-run: ${{ env.DRY_RUN }} - token: ${{secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN}} - - name: Release ${{ fromJson(env.AUTO_RELEASE_PARAMS).releases[2].release_branch }} - if: ${{ fromJson(env.AUTO_RELEASE_PARAMS).releases[2] && fromJson(env.AUTO_RELEASE_PARAMS).releases[2].ready }} - uses: ./.github/actions/release - with: - ref: ${{ fromJson(env.AUTO_RELEASE_PARAMS).releases[2].commit_sha }} - type: patch - dry-run: ${{ env.DRY_RUN }} - token: ${{secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN}} - - name: Release ${{ fromJson(env.AUTO_RELEASE_PARAMS).releases[3].release_branch }} - if: ${{ fromJson(env.AUTO_RELEASE_PARAMS).releases[3] && fromJson(env.AUTO_RELEASE_PARAMS).releases[3].ready }} - uses: ./.github/actions/release - with: - ref: ${{ fromJson(env.AUTO_RELEASE_PARAMS).releases[3].commit_sha }} - type: patch - dry-run: ${{ env.DRY_RUN }} - token: ${{secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN}} - - name: Release ${{ fromJson(env.AUTO_RELEASE_PARAMS).releases[4].release_branch }} - if: ${{ fromJson(env.AUTO_RELEASE_PARAMS).releases[4] && fromJson(env.AUTO_RELEASE_PARAMS).releases[4].ready }} - uses: ./.github/actions/release - with: - ref: ${{ fromJson(env.AUTO_RELEASE_PARAMS).releases[4].commit_sha }} - type: patch - dry-run: ${{ env.DRY_RUN }} - token: ${{secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN}} - - name: Post Slack Message - if: ${{ !cancelled() }} - run: | - cd "$GITHUB_WORKSPACE/tests/ci" - python3 auto_release.py --post-auto-release-complete --wf-status ${{ job.status }} - - name: Clean up - run: | - docker ps --quiet | xargs --no-run-if-empty docker kill ||: - docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: - sudo rm -fr "$TEMP_PATH" diff --git a/.github/workflows/create_release.yml b/.github/workflows/create_release.yml index 29094cc51a6..d4993b373df 100644 --- a/.github/workflows/create_release.yml +++ b/.github/workflows/create_release.yml @@ -89,6 +89,8 @@ jobs: shell: bash run: | python3 ./tests/ci/create_release.py --set-progress-started --progress "update changelog, docker version, security" + + git checkout master # in case WF started from feature branch echo "List versions" ./utils/list-versions/list-versions.sh > ./utils/list-versions/version_date.tsv echo "Update docker version" diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml deleted file mode 100644 index 7dc4e3298a6..00000000000 --- a/.github/workflows/release.yml +++ /dev/null @@ -1,71 +0,0 @@ -name: PublishedReleaseCI -# - Gets artifacts from S3 -# - Sends it to JFROG Artifactory -# - Adds them to the release assets - -on: # yamllint disable-line rule:truthy - release: - types: - - published - workflow_dispatch: - inputs: - tag: - description: 'Release tag' - required: true - type: string - -jobs: - ReleasePublish: - runs-on: [self-hosted, style-checker] - steps: - - name: Set tag from input - if: github.event_name == 'workflow_dispatch' - run: | - echo "GITHUB_TAG=${{ github.event.inputs.tag }}" >> "$GITHUB_ENV" - - name: Set tag from REF - if: github.event_name == 'release' - run: | - echo "GITHUB_TAG=${GITHUB_REF#refs/tags/}" >> "$GITHUB_ENV" - - name: Deploy packages and assets - run: | - curl --silent --data '' --no-buffer \ - '${{ secrets.PACKAGES_RELEASE_URL }}/release/'"${GITHUB_TAG}"'?binary=binary_darwin&binary=binary_darwin_aarch64&sync=true' - ############################################################################################ - ##################################### Docker images ####################################### - ############################################################################################ - DockerServerImages: - runs-on: [self-hosted, style-checker] - steps: - - name: Set tag from input - if: github.event_name == 'workflow_dispatch' - run: | - echo "GITHUB_TAG=${{ github.event.inputs.tag }}" >> "$GITHUB_ENV" - - name: Set tag from REF - if: github.event_name == 'release' - run: | - echo "GITHUB_TAG=${GITHUB_REF#refs/tags/}" >> "$GITHUB_ENV" - - name: Check out repository code - uses: ClickHouse/checkout@v1 - with: - clear-repository: true - fetch-depth: 0 # otherwise we will have no version info - filter: tree:0 - ref: ${{ env.GITHUB_TAG }} - - name: Check docker clickhouse/clickhouse-server building - run: | - cd "$GITHUB_WORKSPACE/tests/ci" - export CHECK_NAME="Docker server image" - SHA=$(git rev-list -n 1 "$GITHUB_TAG") - python3 docker_server.py --release-type auto --version "$GITHUB_TAG" --sha "$SHA" --check-name "$CHECK_NAME" --push - - name: Check docker clickhouse/clickhouse-keeper building - run: | - cd "$GITHUB_WORKSPACE/tests/ci" - export CHECK_NAME="Docker keeper image" - SHA=$(git rev-list -n 1 "$GITHUB_TAG") - python3 docker_server.py --release-type auto --version "$GITHUB_TAG" --sha "$SHA" --check-name "$CHECK_NAME" --push - - name: Cleanup - if: always() - run: | - docker ps --quiet | xargs --no-run-if-empty docker kill ||: - docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: - sudo rm -fr "$TEMP_PATH" diff --git a/.github/workflows/tags_stable.yml b/.github/workflows/tags_stable.yml deleted file mode 100644 index 2aa7694bc41..00000000000 --- a/.github/workflows/tags_stable.yml +++ /dev/null @@ -1,74 +0,0 @@ -name: TagsStableWorkflow -# - Gets artifacts from S3 -# - Sends it to JFROG Artifactory -# - Adds them to the release assets - -env: - # Force the stdout and stderr streams to be unbuffered - PYTHONUNBUFFERED: 1 - -on: # yamllint disable-line rule:truthy - push: - tags: - - 'v*-prestable' - - 'v*-stable' - - 'v*-lts' - workflow_dispatch: - inputs: - tag: - description: 'Test tag' - required: true - type: string - - -jobs: - UpdateVersions: - runs-on: [self-hosted, style-checker] - steps: - - name: Set test tag - if: github.event_name == 'workflow_dispatch' - run: | - echo "GITHUB_TAG=${{ github.event.inputs.tag }}" >> "$GITHUB_ENV" - - name: Get tag name - if: github.event_name != 'workflow_dispatch' - run: | - echo "GITHUB_TAG=${GITHUB_REF#refs/tags/}" >> "$GITHUB_ENV" - - name: Check out repository code - uses: ClickHouse/checkout@v1 - with: - ref: master - fetch-depth: 0 - filter: tree:0 - - name: Update versions, docker version, changelog, security - env: - GITHUB_TOKEN: ${{ secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN }} - run: | - ./utils/list-versions/list-versions.sh > ./utils/list-versions/version_date.tsv - ./utils/list-versions/update-docker-version.sh - GID=$(id -g "${UID}") - # --network=host and CI=1 are required for the S3 access from a container - docker run -u "${UID}:${GID}" -e PYTHONUNBUFFERED=1 -e CI=1 --network=host \ - --volume="${GITHUB_WORKSPACE}:/ClickHouse" clickhouse/style-test \ - /ClickHouse/tests/ci/changelog.py -v --debug-helpers \ - --gh-user-or-token="$GITHUB_TOKEN" --jobs=5 \ - --output="/ClickHouse/docs/changelogs/${GITHUB_TAG}.md" "${GITHUB_TAG}" - git add "./docs/changelogs/${GITHUB_TAG}.md" - python3 ./utils/security-generator/generate_security.py > SECURITY.md - git diff HEAD - - name: Create Pull Request - uses: peter-evans/create-pull-request@v6 - with: - author: "robot-clickhouse " - token: ${{ secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN }} - committer: "robot-clickhouse " - commit-message: Update version_date.tsv and changelogs after ${{ env.GITHUB_TAG }} - branch: auto/${{ env.GITHUB_TAG }} - assignees: ${{ github.event.sender.login }} # assign the PR to the tag pusher - delete-branch: true - title: Update version_date.tsv and changelogs after ${{ env.GITHUB_TAG }} - labels: do not test - body: | - Update version_date.tsv and changelogs after ${{ env.GITHUB_TAG }} - - ### Changelog category (leave one): - - Not for changelog (changelog entry is not required) diff --git a/tests/ci/docker_server.py b/tests/ci/docker_server.py index 8f0474d5053..3251ec5644e 100644 --- a/tests/ci/docker_server.py +++ b/tests/ci/docker_server.py @@ -27,7 +27,6 @@ from stopwatch import Stopwatch from tee_popen import TeePopen from version_helper import ( ClickHouseVersion, - get_tagged_versions, get_version_from_repo, version_arg, ) From aa38e78d7238d843737d1d268de6ee189c19edc3 Mon Sep 17 00:00:00 2001 From: Max K Date: Fri, 2 Aug 2024 20:27:59 +0200 Subject: [PATCH 629/661] update version_date.tsv --- docs/changelogs/v23.8.16.40-lts.md | 35 ++++++++++++++++++++++++++++ utils/list-versions/version_date.tsv | 1 + 2 files changed, 36 insertions(+) create mode 100644 docs/changelogs/v23.8.16.40-lts.md diff --git a/docs/changelogs/v23.8.16.40-lts.md b/docs/changelogs/v23.8.16.40-lts.md new file mode 100644 index 00000000000..75caf1ea277 --- /dev/null +++ b/docs/changelogs/v23.8.16.40-lts.md @@ -0,0 +1,35 @@ +--- +sidebar_position: 1 +sidebar_label: 2024 +--- + +# 2024 Changelog + +### ClickHouse release v23.8.16.40-lts (e143a9039ba) FIXME as compared to v23.8.15.35-lts (060ff8e813a) + +#### Improvement +* Backported in [#66962](https://github.com/ClickHouse/ClickHouse/issues/66962): Added support for parameterized view with analyzer to not analyze create parameterized view. Refactor existing parameterized view logic to not analyze create parameterized view. [#54211](https://github.com/ClickHouse/ClickHouse/pull/54211) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Backported in [#65461](https://github.com/ClickHouse/ClickHouse/issues/65461): Reload certificate chain during certificate reload. [#61671](https://github.com/ClickHouse/ClickHouse/pull/61671) ([Pervakov Grigorii](https://github.com/GrigoryPervakov)). +* Backported in [#65880](https://github.com/ClickHouse/ClickHouse/issues/65880): Always start Keeper with sufficient amount of threads in global thread pool. [#64444](https://github.com/ClickHouse/ClickHouse/pull/64444) ([Duc Canh Le](https://github.com/canhld94)). +* Backported in [#65912](https://github.com/ClickHouse/ClickHouse/issues/65912): Respect cgroup CPU limit in Keeper. [#65819](https://github.com/ClickHouse/ClickHouse/pull/65819) ([Antonio Andelic](https://github.com/antonio2368)). + +#### Critical Bug Fix (crash, LOGICAL_ERROR, data loss, RBAC) +* Backported in [#65281](https://github.com/ClickHouse/ClickHouse/issues/65281): Fix crash with UniqInjectiveFunctionsEliminationPass and uniqCombined. [#65188](https://github.com/ClickHouse/ClickHouse/pull/65188) ([Raúl Marín](https://github.com/Algunenano)). +* Backported in [#65368](https://github.com/ClickHouse/ClickHouse/issues/65368): Fix a bug in ClickHouse Keeper that causes digest mismatch during closing session. [#65198](https://github.com/ClickHouse/ClickHouse/pull/65198) ([Aleksei Filatov](https://github.com/aalexfvk)). +* Backported in [#65743](https://github.com/ClickHouse/ClickHouse/issues/65743): Fix crash in maxIntersections. [#65689](https://github.com/ClickHouse/ClickHouse/pull/65689) ([Raúl Marín](https://github.com/Algunenano)). + +#### Bug Fix (user-visible misbehavior in an official stable release) +* Backported in [#65351](https://github.com/ClickHouse/ClickHouse/issues/65351): Fix possible abort on uncaught exception in ~WriteBufferFromFileDescriptor in StatusFile. [#64206](https://github.com/ClickHouse/ClickHouse/pull/64206) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#66037](https://github.com/ClickHouse/ClickHouse/issues/66037): Fix crash on destroying AccessControl: add explicit shutdown. [#64993](https://github.com/ClickHouse/ClickHouse/pull/64993) ([Vitaly Baranov](https://github.com/vitlibar)). +* Backported in [#65782](https://github.com/ClickHouse/ClickHouse/issues/65782): Fixed bug in MergeJoin. Column in sparse serialisation might be treated as a column of its nested type though the required conversion wasn't performed. [#65632](https://github.com/ClickHouse/ClickHouse/pull/65632) ([Nikita Taranov](https://github.com/nickitat)). +* Backported in [#65926](https://github.com/ClickHouse/ClickHouse/issues/65926): For queries that read from `PostgreSQL`, cancel the internal `PostgreSQL` query if the ClickHouse query is finished. Otherwise, `ClickHouse` query cannot be canceled until the internal `PostgreSQL` query is finished. [#65771](https://github.com/ClickHouse/ClickHouse/pull/65771) ([Maksim Kita](https://github.com/kitaisreal)). +* Backported in [#65822](https://github.com/ClickHouse/ClickHouse/issues/65822): Fix a bug in short circuit logic when old analyzer and dictGetOrDefault is used. [#65802](https://github.com/ClickHouse/ClickHouse/pull/65802) ([jsc0218](https://github.com/jsc0218)). +* Backported in [#66449](https://github.com/ClickHouse/ClickHouse/issues/66449): Fixed a bug in ZooKeeper client: a session could get stuck in unusable state after receiving a hardware error from ZooKeeper. For example, this might happen due to "soft memory limit" in ClickHouse Keeper. [#66140](https://github.com/ClickHouse/ClickHouse/pull/66140) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#66717](https://github.com/ClickHouse/ClickHouse/issues/66717): Correctly track memory for `Allocator::realloc`. [#66548](https://github.com/ClickHouse/ClickHouse/pull/66548) ([Antonio Andelic](https://github.com/antonio2368)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Backported in [#65080](https://github.com/ClickHouse/ClickHouse/issues/65080): Follow up to [#56541](https://github.com/ClickHouse/ClickHouse/issues/56541). [#57141](https://github.com/ClickHouse/ClickHouse/pull/57141) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Backported in [#65913](https://github.com/ClickHouse/ClickHouse/issues/65913): Fix bug with session closing in Keeper. [#65735](https://github.com/ClickHouse/ClickHouse/pull/65735) ([Antonio Andelic](https://github.com/antonio2368)). +* Backported in [#66853](https://github.com/ClickHouse/ClickHouse/issues/66853): Fix data race in S3::ClientCache. [#66644](https://github.com/ClickHouse/ClickHouse/pull/66644) ([Konstantin Morozov](https://github.com/k-morozov)). + diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv index 24488066190..cb6b8f588da 100644 --- a/utils/list-versions/version_date.tsv +++ b/utils/list-versions/version_date.tsv @@ -50,6 +50,7 @@ v23.9.4.11-stable 2023-11-08 v23.9.3.12-stable 2023-10-31 v23.9.2.56-stable 2023-10-19 v23.9.1.1854-stable 2023-09-29 +v23.8.16.40-lts 2024-08-02 v23.8.15.35-lts 2024-06-14 v23.8.14.6-lts 2024-05-02 v23.8.13.25-lts 2024-04-26 From a45ba44dbaa2ed43eb63e49fe609a01be978eac9 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Fri, 2 Aug 2024 18:28:38 +0000 Subject: [PATCH 630/661] Automatic style fix --- tests/ci/ci.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/ci/ci.py b/tests/ci/ci.py index 6ca84a346e2..805296d2bb2 100644 --- a/tests/ci/ci.py +++ b/tests/ci/ci.py @@ -1019,7 +1019,9 @@ def _get_ext_check_name(check_name: str) -> str: return check_name_with_group -def _cancel_pr_workflow(s3: S3Helper, pr_number: int, cancel_sync: bool = False) -> None: +def _cancel_pr_workflow( + s3: S3Helper, pr_number: int, cancel_sync: bool = False +) -> None: wf_data = CiMetadata(s3, pr_number).fetch_meta() if not cancel_sync: if not wf_data.run_id: From dd0ae04f90314ce6d5dbe748605e66f1a6d9024f Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 31 Jul 2024 17:38:20 +0200 Subject: [PATCH 631/661] Minor change --- src/Databases/DatabaseOnDisk.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Databases/DatabaseOnDisk.cpp b/src/Databases/DatabaseOnDisk.cpp index f419f5811a1..734f354d9a5 100644 --- a/src/Databases/DatabaseOnDisk.cpp +++ b/src/Databases/DatabaseOnDisk.cpp @@ -313,7 +313,7 @@ void DatabaseOnDisk::detachTablePermanently(ContextPtr query_context, const Stri std::lock_guard lock(mutex); if (const auto it = snapshot_detached_tables.find(table_name); it == snapshot_detached_tables.end()) { - throw Exception(ErrorCodes::LOGICAL_ERROR, "Snapshot doesn't contain info about detached table={}", table_name); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Snapshot doesn't contain info about detached table `{}`", table_name); } else { From a431ab3e4b6f925924a81d99997e6c028ae7950f Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 3 Aug 2024 01:31:02 +0200 Subject: [PATCH 632/661] Improve dashboard --- programs/server/dashboard.html | 66 +++++++++++++++++++++++++++------- 1 file changed, 54 insertions(+), 12 deletions(-) diff --git a/programs/server/dashboard.html b/programs/server/dashboard.html index 45f988f7b1e..71880b9e228 100644 --- a/programs/server/dashboard.html +++ b/programs/server/dashboard.html @@ -17,7 +17,7 @@ --input-shadow-color: rgba(0, 255, 0, 1); --error-color: red; --global-error-color: white; - --legend-background: rgba(255, 255, 255, 0.75); + --legend-background: rgba(255, 255, 0, 0.75); --title-color: #666; --text-color: black; --edit-title-background: #FEE; @@ -41,7 +41,7 @@ --moving-shadow-color: rgba(255, 255, 255, 0.25); --input-shadow-color: rgba(255, 128, 0, 0.25); --error-color: #F66; - --legend-background: rgba(255, 255, 255, 0.25); + --legend-background: rgba(0, 96, 128, 0.75); --title-color: white; --text-color: white; --edit-title-background: #364f69; @@ -1004,14 +1004,14 @@ function legendAsTooltipPlugin({ className, style = { background: "var(--legend- className && legendEl.classList.add(className); uPlot.assign(legendEl.style, { - textAlign: "left", + textAlign: "right", pointerEvents: "none", display: "none", position: "absolute", left: 0, top: 0, - zIndex: 100, - boxShadow: "2px 2px 10px rgba(0,0,0,0.1)", + zIndex: 200, + boxShadow: "2px 2px 10px rgba(0, 0, 0, 0.1)", ...style }); @@ -1051,8 +1051,10 @@ function legendAsTooltipPlugin({ className, style = { background: "var(--legend- function update(u) { let { left, top } = u.cursor; - left -= legendEl.clientWidth / 2; - top -= legendEl.clientHeight / 2; + /// This will make the balloon to the right of the cursor when the cursor is on the left side, and vise-versa, + /// avoiding the borders of the chart. + left -= legendEl.clientWidth * (left / u.width); + top -= legendEl.clientHeight; legendEl.style.transform = "translate(" + left + "px, " + top + "px)"; if (multiline) { @@ -1229,14 +1231,53 @@ async function draw(idx, chart, url_params, query) { let sync = uPlot.sync("sync"); - let axis = { + function formatDateTime(t) { + return (new Date(t * 1000)).toISOString().replace('T', '\n').replace('.000Z', ''); + } + + function formatDateTimes(self, ticks) { + return ticks.map((t, idx) => { + let res = formatDateTime(t); + if (idx == 0 || res.substring(0, 10) != formatDateTime(ticks[idx - 1]).substring(0, 10)) { + return res; + } else { + return res.substring(11); + } + }); + } + + function formatValue(v) { + const a = Math.abs(v); + if (a >= 1000000000000000) { return (v / 1000000000000000) + 'P'; } + if (a >= 1000000000000) { return (v / 1000000000000) + 'T'; } + if (a >= 1000000000) { return (v / 1000000000) + 'G'; } + if (a >= 1000000) { return (v / 1000000) + 'M'; } + if (a >= 1000) { return (v / 1000) + 'K'; } + if (a > 0 && a < 0.001) { return (v * 1000000) + "μ"; } + return v; + } + + let axis_x = { stroke: axes_color, grid: { width: 1 / devicePixelRatio, stroke: grid_color }, - ticks: { width: 1 / devicePixelRatio, stroke: grid_color } + ticks: { width: 1 / devicePixelRatio, stroke: grid_color }, + values: formatDateTimes, + space: 80, + incrs: [1, 5, 10, 15, 30, + 60, 60 * 5, 60 * 10, 60 * 15, 60 * 30, + 3600, 3600 * 2, 3600 * 3, 3600 * 4, 3600 * 6, 3600 * 12, + 3600 * 24], }; - let axes = [axis, axis]; - let series = [{ label: "x" }]; + let axis_y = { + stroke: axes_color, + grid: { width: 1 / devicePixelRatio, stroke: grid_color }, + ticks: { width: 1 / devicePixelRatio, stroke: grid_color }, + values: (self, ticks) => ticks.map(formatValue) + }; + + let axes = [axis_x, axis_y]; + let series = [{ label: "time", value: (self, t) => formatDateTime(t) }]; let data = [reply.data[reply.meta[0].name]]; // Treat every column as series @@ -1254,9 +1295,10 @@ async function draw(idx, chart, url_params, query) { const opts = { width: chart.clientWidth, height: chart.clientHeight, + scales: { x: { time: false } }, /// Because we want to split and format time on our own. axes, series, - padding: [ null, null, null, (Math.round(max_value * 100) / 100).toString().length * 6 - 10 ], + padding: [ null, null, null, 3 ], plugins: [ legendAsTooltipPlugin() ], cursor: { sync: { From a6f9dd4447cbb475cbf77b07de35b40fbcad50b1 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 3 Aug 2024 01:35:10 +0200 Subject: [PATCH 633/661] Improve dashboard --- programs/server/dashboard.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/programs/server/dashboard.html b/programs/server/dashboard.html index 71880b9e228..c69acec7858 100644 --- a/programs/server/dashboard.html +++ b/programs/server/dashboard.html @@ -1010,7 +1010,7 @@ function legendAsTooltipPlugin({ className, style = { background: "var(--legend- position: "absolute", left: 0, top: 0, - zIndex: 200, + zIndex: 100, boxShadow: "2px 2px 10px rgba(0, 0, 0, 0.1)", ...style }); From 95659de26573bdb17ab2b5649e6dad96fb75c479 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 3 Aug 2024 01:45:41 +0200 Subject: [PATCH 634/661] Fix invalid detection of an empty result --- programs/server/dashboard.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/programs/server/dashboard.html b/programs/server/dashboard.html index c69acec7858..238254f4ef8 100644 --- a/programs/server/dashboard.html +++ b/programs/server/dashboard.html @@ -1141,7 +1141,7 @@ async function draw(idx, chart, url_params, query) { let {reply, error} = await doFetch(query, url_params); if (!error) { - if (reply.rows.length == 0) { + if (reply.rows == 0) { error = "Query returned empty result."; } else if (reply.meta.length < 2) { error = "Query should return at least two columns: unix timestamp and value."; From a99f9bb603f78437fba8d3ebb031c2f41d00cd58 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 3 Aug 2024 01:50:27 +0200 Subject: [PATCH 635/661] Focus on the mass editor --- programs/server/dashboard.html | 1 + 1 file changed, 1 insertion(+) diff --git a/programs/server/dashboard.html b/programs/server/dashboard.html index 238254f4ef8..8fb07d5da3b 100644 --- a/programs/server/dashboard.html +++ b/programs/server/dashboard.html @@ -945,6 +945,7 @@ function showMassEditor() { let editor = document.getElementById('mass-editor-textarea'); editor.value = JSON.stringify({params: params, queries: queries}, null, 2); + editor.focus(); mass_editor_active = true; } From eeb8c1caac9e8e2ba2f3a1a86f5603281e161610 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 3 Aug 2024 02:06:53 +0200 Subject: [PATCH 636/661] Improve margins when there are many parameters --- programs/server/dashboard.html | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/programs/server/dashboard.html b/programs/server/dashboard.html index 8fb07d5da3b..344de779065 100644 --- a/programs/server/dashboard.html +++ b/programs/server/dashboard.html @@ -256,6 +256,7 @@ font-weight: bold; user-select: none; cursor: pointer; + margin-bottom: 1rem; } #run:hover { @@ -309,7 +310,7 @@ color: var(--param-text-color); display: inline-block; box-shadow: 1px 1px 0 var(--shadow-color); - margin-bottom: 1rem; + margin-bottom: 0.5rem; } input:focus { From 090fb59194462324507d75f032aa803303c3e041 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 3 Aug 2024 02:20:58 +0200 Subject: [PATCH 637/661] Automatic field width of chart parameters --- programs/server/dashboard.html | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/programs/server/dashboard.html b/programs/server/dashboard.html index 344de779065..0b099b15536 100644 --- a/programs/server/dashboard.html +++ b/programs/server/dashboard.html @@ -218,6 +218,7 @@ #chart-params .param { width: 6%; + font-family: monospace; } input { @@ -658,6 +659,10 @@ function insertParam(name, value) { param_value.value = value; param_value.spellcheck = false; + let setWidth = e => { e.style.width = (e.value.length + 1) + 'ch' }; + if (value) { setWidth(param_value); } + param_value.addEventListener('input', e => setWidth(e.target)); + param_wrapper.appendChild(param_name); param_wrapper.appendChild(param_value); document.getElementById('chart-params').appendChild(param_wrapper); From 9a017528a4685fc4ed7eec7ba37f9e9804972c3b Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 31 Jul 2024 17:38:20 +0200 Subject: [PATCH 638/661] Minor change --- src/Databases/DatabaseOnDisk.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Databases/DatabaseOnDisk.cpp b/src/Databases/DatabaseOnDisk.cpp index f419f5811a1..734f354d9a5 100644 --- a/src/Databases/DatabaseOnDisk.cpp +++ b/src/Databases/DatabaseOnDisk.cpp @@ -313,7 +313,7 @@ void DatabaseOnDisk::detachTablePermanently(ContextPtr query_context, const Stri std::lock_guard lock(mutex); if (const auto it = snapshot_detached_tables.find(table_name); it == snapshot_detached_tables.end()) { - throw Exception(ErrorCodes::LOGICAL_ERROR, "Snapshot doesn't contain info about detached table={}", table_name); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Snapshot doesn't contain info about detached table `{}`", table_name); } else { From dfeb1991164bd6c8b0efc8bdcfe9dcd5b8906928 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 3 Aug 2024 03:06:37 +0200 Subject: [PATCH 639/661] Fix locking inside TimerDescriptor --- src/Common/TimerDescriptor.cpp | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/Common/TimerDescriptor.cpp b/src/Common/TimerDescriptor.cpp index 9a171ae9487..ce290a1cb31 100644 --- a/src/Common/TimerDescriptor.cpp +++ b/src/Common/TimerDescriptor.cpp @@ -2,6 +2,7 @@ #include #include +#include #include #include @@ -75,10 +76,22 @@ void TimerDescriptor::drain() const /// or since the last successful read(2), then the buffer given to read(2) returns an unsigned 8-byte integer (uint64_t) /// containing the number of expirations that have occurred. /// (The returned value is in host byte order—that is, the native byte order for integers on the host machine.) + + /// Due to a bug in Linux Kernel, reading from timerfd in non-blocking mode can be still blocking. + /// Avoid it with polling. + Epoll epoll; + epoll.add(timer_fd); + epoll_event event; + event.data.fd = -1; + size_t ready_count = epoll.getManyReady(1, &event, 0); + if (!ready_count) + return; + uint64_t buf; while (true) { ssize_t res = ::read(timer_fd, &buf, sizeof(buf)); + if (res < 0) { /// man timerfd_create: From f97abf69949f8822d70f4b1251e1945f279dd0ec Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sat, 3 Aug 2024 09:35:37 +0200 Subject: [PATCH 640/661] tests: avoid endless wait in 01042_system_reload_dictionary_reloads_completely Signed-off-by: Azat Khuzhin --- ...em_reload_dictionary_reloads_completely.sh | 21 ++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/tests/queries/0_stateless/01042_system_reload_dictionary_reloads_completely.sh b/tests/queries/0_stateless/01042_system_reload_dictionary_reloads_completely.sh index 03dd376f802..ebc4110332f 100755 --- a/tests/queries/0_stateless/01042_system_reload_dictionary_reloads_completely.sh +++ b/tests/queries/0_stateless/01042_system_reload_dictionary_reloads_completely.sh @@ -8,6 +8,18 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) set -e -o pipefail +# Wait when the dictionary will update the value for 13 on its own: +function wait_for_dict_upate() +{ + for ((i = 0; i < 100; ++i)); do + if [ "$(${CLICKHOUSE_CLIENT} --query "SELECT dictGetInt64('${CLICKHOUSE_DATABASE}.dict', 'y', toUInt64(13))")" != -1 ]; then + return 0 + fi + sleep 0.5 + done + return 1 +} + $CLICKHOUSE_CLIENT < ', dictGetInt64('${CLICKHOUSE_DATABASE $CLICKHOUSE_CLIENT --query "INSERT INTO ${CLICKHOUSE_DATABASE}.table VALUES (13, 103, now())" $CLICKHOUSE_CLIENT --query "INSERT INTO ${CLICKHOUSE_DATABASE}.table VALUES (14, 104, now() - INTERVAL 1 DAY)" -# Wait when the dictionary will update the value for 13 on its own: -while [ "$(${CLICKHOUSE_CLIENT} --query "SELECT dictGetInt64('${CLICKHOUSE_DATABASE}.dict', 'y', toUInt64(13))")" = -1 ] -do - sleep 0.5 -done +if ! wait_for_dict_upate; then + echo "Dictionary had not been reloaded" >&2 + exit 1 +fi $CLICKHOUSE_CLIENT --query "SELECT '13 -> ', dictGetInt64('${CLICKHOUSE_DATABASE}.dict', 'y', toUInt64(13))" From 40cd5467c18d65a6624d273ac1a8fd9cc9257d8c Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sat, 3 Aug 2024 09:39:33 +0200 Subject: [PATCH 641/661] tests: fix 01042_system_reload_dictionary_reloads_completely flakiness (increase lag) The test fails in case of INSERT takes > 1 sec: 2024.08.02 13:06:07.746869 [ 45445 ] {c9b55378-6bc5-46d5-80c1-5385a880f88b} executeQuery: (from [::1]:37208) (comment: 01042_system_reload_dictionary_reloads_completely.sh) CREATE DICTIONARY test_m4lx2bit.dict ( x Int64 DEFAULT -1, y Int64 DEFAULT -1, insert_time DateTime ) PRIMARY KEY x SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'table' DB 'test_m4lx2bit' UPDATE_FIELD 'insert_time')) LAYOUT(FLAT()) LIFETIME(1); (stage: Complete) ... 2024.08.02 13:06:08.263832 [ 59987 ] {744da223-67b9-4e32-b12a-eb2550a92fdb} DictionaryFactory: Created dictionary source 'ClickHouse: test_m4lx2bit.table' for dictionary '5b2b98a9-9372-47c9-bda3-830794cb96e7' 2024.08.02 13:06:08.268118 [ 59987 ] {744da223-67b9-4e32-b12a-eb2550a92fdb} executeQuery: (internal) SELECT `x`, `y`, `insert_time` FROM `test_m4lx2bit`.`table`; (stage: Complete) ... 2024.08.02 13:06:09.193190 [ 45445 ] {b6033498-4666-452f-bcf9-02ecf257ba7f} executeQuery: (from [::1]:37262) (comment: 01042_system_reload_dictionary_reloads_completely.sh) INSERT INTO test_m4lx2bit.table VALUES (stage: Complete) ... 2024.08.02 13:06:11.342119 [ 50962 ] {} executeQuery: (internal) SELECT `x`, `y`, `insert_time` FROM `test_m4lx2bit`.`table` WHERE insert_time >= '2024-08-02 13:06:07'; (stage: Complete) ... 2024.08.02 13:06:11.832158 [ 45445 ] {b6033498-4666-452f-bcf9-02ecf257ba7f} TCPHandler: Processed in 2.642106236 sec. ... 2024.08.02 13:06:16.357448 [ 41632 ] {} executeQuery: (internal) SELECT `x`, `y`, `insert_time` FROM `test_m4lx2bit`.`table` WHERE insert_time >= '2024-08-02 13:06:10'; (stage: Complete) Signed-off-by: Azat Khuzhin --- .../01042_system_reload_dictionary_reloads_completely.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01042_system_reload_dictionary_reloads_completely.sh b/tests/queries/0_stateless/01042_system_reload_dictionary_reloads_completely.sh index ebc4110332f..453e1bb8f0a 100755 --- a/tests/queries/0_stateless/01042_system_reload_dictionary_reloads_completely.sh +++ b/tests/queries/0_stateless/01042_system_reload_dictionary_reloads_completely.sh @@ -31,7 +31,7 @@ CREATE DICTIONARY ${CLICKHOUSE_DATABASE}.dict insert_time DateTime ) PRIMARY KEY x -SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'table' DB '${CLICKHOUSE_DATABASE}' UPDATE_FIELD 'insert_time')) +SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'table' DB '${CLICKHOUSE_DATABASE}' UPDATE_FIELD 'insert_time' UPDATE_LAG 60)) LAYOUT(FLAT()) LIFETIME(1); EOF From 6ce6af0647590f4b58a6ab87ee5f29b8487e8c2f Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Sat, 3 Aug 2024 14:16:24 +0200 Subject: [PATCH 642/661] Fix completion RESTORE ON CLUSTER. --- src/Backups/RestorerFromBackup.cpp | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/src/Backups/RestorerFromBackup.cpp b/src/Backups/RestorerFromBackup.cpp index 3056f9fe421..278af9d4eb3 100644 --- a/src/Backups/RestorerFromBackup.cpp +++ b/src/Backups/RestorerFromBackup.cpp @@ -222,10 +222,19 @@ void RestorerFromBackup::setStage(const String & new_stage, const String & messa if (restore_coordination) { restore_coordination->setStage(new_stage, message); - if (new_stage == Stage::FINDING_TABLES_IN_BACKUP) - restore_coordination->waitForStage(new_stage, on_cluster_first_sync_timeout); - else - restore_coordination->waitForStage(new_stage); + + /// The initiator of a RESTORE ON CLUSTER query waits for other hosts to complete their work (see waitForStage(Stage::COMPLETED) in BackupsWorker::doRestore), + /// but other hosts shouldn't wait for each others' completion. (That's simply unnecessary and also + /// the initiator may start cleaning up (e.g. removing restore-coordination ZooKeeper nodes) once all other hosts are in Stage::COMPLETED.) + bool need_wait = (new_stage != Stage::COMPLETED); + + if (need_wait) + { + if (new_stage == Stage::FINDING_TABLES_IN_BACKUP) + restore_coordination->waitForStage(new_stage, on_cluster_first_sync_timeout); + else + restore_coordination->waitForStage(new_stage); + } } } From a749223251b7e580f5d7bbcb4fc59aa6b5fffbe2 Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Sat, 3 Aug 2024 22:43:12 +0800 Subject: [PATCH 643/661] change as request --- src/Functions/printf.cpp | 39 ++++++++++++++----- .../0_stateless/03203_function_printf.sql | 7 +++- 2 files changed, 36 insertions(+), 10 deletions(-) diff --git a/src/Functions/printf.cpp b/src/Functions/printf.cpp index 3efe854a53b..3cf3efaf534 100644 --- a/src/Functions/printf.cpp +++ b/src/Functions/printf.cpp @@ -6,11 +6,10 @@ #include #include #include +#include #include #include -#include -#include #include #include #include @@ -22,6 +21,7 @@ namespace ErrorCodes extern const int ILLEGAL_COLUMN; extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; extern const int ILLEGAL_TYPE_OF_ARGUMENT; +extern const int BAD_ARGUMENTS; } namespace @@ -52,10 +52,9 @@ private: [[maybe_unused]] String toString() const { - std::ostringstream oss; // STYLE_CHECK_ALLOW_STD_STRING_STREAM - oss << "format:" << format << ", rows:" << rows << ", is_literal:" << is_literal << ", input:" << input.dumpStructure() - << std::endl; - return oss.str(); + WriteBufferFromOwnString buf; + buf << "format:" << format << ", rows:" << rows << ", is_literal:" << is_literal << ", input:" << input.dumpStructure() << "\n"; + return buf.str(); } private: @@ -229,9 +228,31 @@ public: ColumnsWithTypeAndName concat_args(instructions.size()); for (size_t i = 0; i < instructions.size(); ++i) { - // std::cout << "instruction[" << i << "]:" << instructions[i].toString() << std::endl; - concat_args[i] = instructions[i].execute(); - // std::cout << "concat_args[" << i << "]:" << concat_args[i].dumpStructure() << std::endl; + const auto & instruction = instructions[i]; + try + { + // std::cout << "instruction[" << i << "]:" << instructions[i].toString() << std::endl; + concat_args[i] = instruction.execute(); + // std::cout << "concat_args[" << i << "]:" << concat_args[i].dumpStructure() << std::endl; + } + catch (const fmt::v9::format_error & e) + { + if (instruction.is_literal) + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "Bad format {} in function {} without input argument, reason: {}", + instruction.format, + getName(), + e.what()); + else + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "Bad format {} in function {} with {} as input argument, reason: {}", + instructions[i].format, + getName(), + instruction.input.dumpStructure(), + e.what()); + } } auto res = function_concat->build(concat_args)->execute(concat_args, std::make_shared(), input_rows_count); diff --git a/tests/queries/0_stateless/03203_function_printf.sql b/tests/queries/0_stateless/03203_function_printf.sql index c41cbf0b5e9..6ff4699c8a7 100644 --- a/tests/queries/0_stateless/03203_function_printf.sql +++ b/tests/queries/0_stateless/03203_function_printf.sql @@ -31,4 +31,9 @@ select printf('%%.2e: %.2e', 123.456) = '%.2e: 1.23e+02'; select printf('%%.2g: %.2g', 123.456) = '%.2g: 1.2e+02'; -- Testing character formats with precision -select printf('%%.2s: %.2s', 'abc') = '%.2s: ab'; \ No newline at end of file +select printf('%%.2s: %.2s', 'abc') = '%.2s: ab'; + +select printf('%%X: %X', 123.123); -- { serverError BAD_ARGUMENTS } +select printf('%%A: %A', 'abc'); -- { serverError BAD_ARGUMENTS } +select printf('%%s: %s', 100); -- { serverError BAD_ARGUMENTS } +select printf('%%n: %n', 100); -- { serverError BAD_ARGUMENTS } From fc651cc0c61feb37e9cf104612cc0ac0cd7448e9 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 3 Aug 2024 17:57:48 +0200 Subject: [PATCH 644/661] Fix strange code in HostResolvePool --- src/Common/HostResolvePool.cpp | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/Common/HostResolvePool.cpp b/src/Common/HostResolvePool.cpp index cad64ee7204..e8a05a269bc 100644 --- a/src/Common/HostResolvePool.cpp +++ b/src/Common/HostResolvePool.cpp @@ -253,18 +253,18 @@ void HostResolver::updateImpl(Poco::Timestamp now, std::vector Date: Sat, 3 Aug 2024 18:30:33 +0200 Subject: [PATCH 645/661] Fix typo --- programs/server/Server.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 20db4c2773c..7800ee9ff00 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -849,7 +849,7 @@ try #endif #if defined(SANITIZER) - LOG_INFO(log, "Query Profiler disabled because they cannot work under sanitizers" + LOG_INFO(log, "Query Profiler is disabled because it cannot work under sanitizers" " when two different stack unwinding methods will interfere with each other."); #endif From 60648e5240fecb92344ff029d2b280f542c3a86e Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 3 Aug 2024 18:41:17 +0200 Subject: [PATCH 646/661] Revert "Add replication lag and recovery time metrics" --- src/Databases/DatabaseReplicated.cpp | 57 ++++------------- src/Databases/DatabaseReplicated.h | 10 +-- src/Databases/DatabaseReplicatedWorker.cpp | 21 ------- src/Databases/DatabaseReplicatedWorker.h | 5 -- src/Storages/System/StorageSystemClusters.cpp | 37 ++++------- src/Storages/System/StorageSystemClusters.h | 4 +- .../test_recovery_time_metric/__init__.py | 0 .../configs/config.xml | 41 ------------- .../test_recovery_time_metric/test.py | 61 ------------------- .../02117_show_create_table_system.reference | 2 - .../03206_replication_lag_metric.reference | 4 -- .../03206_replication_lag_metric.sql | 11 ---- 12 files changed, 27 insertions(+), 226 deletions(-) delete mode 100644 tests/integration/test_recovery_time_metric/__init__.py delete mode 100644 tests/integration/test_recovery_time_metric/configs/config.xml delete mode 100644 tests/integration/test_recovery_time_metric/test.py delete mode 100644 tests/queries/0_stateless/03206_replication_lag_metric.reference delete mode 100644 tests/queries/0_stateless/03206_replication_lag_metric.sql diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp index d2dee9b5994..f127ccbc224 100644 --- a/src/Databases/DatabaseReplicated.cpp +++ b/src/Databases/DatabaseReplicated.cpp @@ -12,7 +12,6 @@ #include #include #include -#include #include #include #include @@ -339,12 +338,9 @@ ClusterPtr DatabaseReplicated::getClusterImpl(bool all_groups) const return std::make_shared(getContext()->getSettingsRef(), shards, params); } -ReplicasInfo DatabaseReplicated::tryGetReplicasInfo(const ClusterPtr & cluster_) const +std::vector DatabaseReplicated::tryGetAreReplicasActive(const ClusterPtr & cluster_) const { - Strings paths_get, paths_exists; - - paths_get.emplace_back(fs::path(zookeeper_path) / "max_log_ptr"); - + Strings paths; const auto & addresses_with_failover = cluster_->getShardsAddresses(); const auto & shards_info = cluster_->getShardsInfo(); for (size_t shard_index = 0; shard_index < shards_info.size(); ++shard_index) @@ -352,59 +348,32 @@ ReplicasInfo DatabaseReplicated::tryGetReplicasInfo(const ClusterPtr & cluster_) for (const auto & replica : addresses_with_failover[shard_index]) { String full_name = getFullReplicaName(replica.database_shard_name, replica.database_replica_name); - paths_exists.emplace_back(fs::path(zookeeper_path) / "replicas" / full_name / "active"); - paths_get.emplace_back(fs::path(zookeeper_path) / "replicas" / full_name / "log_ptr"); + paths.emplace_back(fs::path(zookeeper_path) / "replicas" / full_name / "active"); } } try { auto current_zookeeper = getZooKeeper(); - auto get_res = current_zookeeper->get(paths_get); - auto exist_res = current_zookeeper->exists(paths_exists); - chassert(get_res.size() == exist_res.size() + 1); + auto res = current_zookeeper->exists(paths); - auto max_log_ptr_zk = get_res[0]; - if (max_log_ptr_zk.error != Coordination::Error::ZOK) - throw Coordination::Exception(max_log_ptr_zk.error); + std::vector statuses; + statuses.resize(paths.size()); - UInt32 max_log_ptr = parse(max_log_ptr_zk.data); + for (size_t i = 0; i < res.size(); ++i) + if (res[i].error == Coordination::Error::ZOK) + statuses[i] = 1; - ReplicasInfo replicas_info; - replicas_info.resize(exist_res.size()); - - size_t global_replica_index = 0; - for (size_t shard_index = 0; shard_index < shards_info.size(); ++shard_index) - { - for (const auto & replica : addresses_with_failover[shard_index]) - { - auto replica_active = exist_res[global_replica_index]; - auto replica_log_ptr = get_res[global_replica_index + 1]; - - if (replica_active.error != Coordination::Error::ZOK && replica_active.error != Coordination::Error::ZNONODE) - throw Coordination::Exception(replica_active.error); - - if (replica_log_ptr.error != Coordination::Error::ZOK) - throw Coordination::Exception(replica_log_ptr.error); - - replicas_info[global_replica_index] = ReplicaInfo{ - .is_active = replica_active.error == Coordination::Error::ZOK, - .replication_lag = max_log_ptr - parse(replica_log_ptr.data), - .recovery_time = replica.is_local ? ddl_worker->getCurrentInitializationDurationMs() : 0, - }; - - ++global_replica_index; - } - } - - return replicas_info; - } catch (...) + return statuses; + } + catch (...) { tryLogCurrentException(log); return {}; } } + void DatabaseReplicated::fillClusterAuthInfo(String collection_name, const Poco::Util::AbstractConfiguration & config_ref) { const auto & config_prefix = fmt::format("named_collections.{}", collection_name); diff --git a/src/Databases/DatabaseReplicated.h b/src/Databases/DatabaseReplicated.h index 5a1570ae2e2..27ab262d1f1 100644 --- a/src/Databases/DatabaseReplicated.h +++ b/src/Databases/DatabaseReplicated.h @@ -17,14 +17,6 @@ using ZooKeeperPtr = std::shared_ptr; class Cluster; using ClusterPtr = std::shared_ptr; -struct ReplicaInfo -{ - bool is_active; - UInt32 replication_lag; - UInt64 recovery_time; -}; -using ReplicasInfo = std::vector; - class DatabaseReplicated : public DatabaseAtomic { public: @@ -92,7 +84,7 @@ public: static void dropReplica(DatabaseReplicated * database, const String & database_zookeeper_path, const String & shard, const String & replica, bool throw_if_noop); - ReplicasInfo tryGetReplicasInfo(const ClusterPtr & cluster_) const; + std::vector tryGetAreReplicasActive(const ClusterPtr & cluster_) const; void renameDatabase(ContextPtr query_context, const String & new_name) override; diff --git a/src/Databases/DatabaseReplicatedWorker.cpp b/src/Databases/DatabaseReplicatedWorker.cpp index 4e7408aa96e..1ef88dc03bc 100644 --- a/src/Databases/DatabaseReplicatedWorker.cpp +++ b/src/Databases/DatabaseReplicatedWorker.cpp @@ -32,12 +32,6 @@ DatabaseReplicatedDDLWorker::DatabaseReplicatedDDLWorker(DatabaseReplicated * db bool DatabaseReplicatedDDLWorker::initializeMainThread() { - { - std::lock_guard lock(initialization_duration_timer_mutex); - initialization_duration_timer.emplace(); - initialization_duration_timer->start(); - } - while (!stop_flag) { try @@ -75,10 +69,6 @@ bool DatabaseReplicatedDDLWorker::initializeMainThread() initializeReplication(); initialized = true; - { - std::lock_guard lock(initialization_duration_timer_mutex); - initialization_duration_timer.reset(); - } return true; } catch (...) @@ -88,11 +78,6 @@ bool DatabaseReplicatedDDLWorker::initializeMainThread() } } - { - std::lock_guard lock(initialization_duration_timer_mutex); - initialization_duration_timer.reset(); - } - return false; } @@ -474,10 +459,4 @@ UInt32 DatabaseReplicatedDDLWorker::getLogPointer() const return max_id.load(); } -UInt64 DatabaseReplicatedDDLWorker::getCurrentInitializationDurationMs() const -{ - std::lock_guard lock(initialization_duration_timer_mutex); - return initialization_duration_timer ? initialization_duration_timer->elapsedMilliseconds() : 0; -} - } diff --git a/src/Databases/DatabaseReplicatedWorker.h b/src/Databases/DatabaseReplicatedWorker.h index 2309c831839..41edf2221b8 100644 --- a/src/Databases/DatabaseReplicatedWorker.h +++ b/src/Databases/DatabaseReplicatedWorker.h @@ -36,8 +36,6 @@ public: DatabaseReplicated * const database, bool committed = false); /// NOLINT UInt32 getLogPointer() const; - - UInt64 getCurrentInitializationDurationMs() const; private: bool initializeMainThread() override; void initializeReplication(); @@ -58,9 +56,6 @@ private: ZooKeeperPtr active_node_holder_zookeeper; /// It will remove "active" node when database is detached zkutil::EphemeralNodeHolderPtr active_node_holder; - - std::optional initialization_duration_timer; - mutable std::mutex initialization_duration_timer_mutex; }; } diff --git a/src/Storages/System/StorageSystemClusters.cpp b/src/Storages/System/StorageSystemClusters.cpp index d03b600b6ef..160c8d6270e 100644 --- a/src/Storages/System/StorageSystemClusters.cpp +++ b/src/Storages/System/StorageSystemClusters.cpp @@ -31,8 +31,6 @@ ColumnsDescription StorageSystemClusters::getColumnsDescription() {"database_shard_name", std::make_shared(), "The name of the `Replicated` database shard (for clusters that belong to a `Replicated` database)."}, {"database_replica_name", std::make_shared(), "The name of the `Replicated` database replica (for clusters that belong to a `Replicated` database)."}, {"is_active", std::make_shared(std::make_shared()), "The status of the Replicated database replica (for clusters that belong to a Replicated database): 1 means 'replica is online', 0 means 'replica is offline', NULL means 'unknown'."}, - {"replication_lag", std::make_shared(std::make_shared()), "The replication lag of the `Replicated` database replica (for clusters that belong to a Replicated database)."}, - {"recovery_time", std::make_shared(std::make_shared()), "The recovery time of the `Replicated` database replica (for clusters that belong to a Replicated database), in milliseconds."}, }; description.setAliases({ @@ -48,30 +46,31 @@ void StorageSystemClusters::fillData(MutableColumns & res_columns, ContextPtr co writeCluster(res_columns, name_and_cluster, {}); const auto databases = DatabaseCatalog::instance().getDatabases(); - for (const auto & [database_name, database] : databases) + for (const auto & name_and_database : databases) { - if (const auto * replicated = typeid_cast(database.get())) + if (const auto * replicated = typeid_cast(name_and_database.second.get())) { + if (auto database_cluster = replicated->tryGetCluster()) - writeCluster(res_columns, {database_name, database_cluster}, - replicated->tryGetReplicasInfo(database_cluster)); + writeCluster(res_columns, {name_and_database.first, database_cluster}, + replicated->tryGetAreReplicasActive(database_cluster)); if (auto database_cluster = replicated->tryGetAllGroupsCluster()) - writeCluster(res_columns, {DatabaseReplicated::ALL_GROUPS_CLUSTER_PREFIX + database_name, database_cluster}, - replicated->tryGetReplicasInfo(database_cluster)); + writeCluster(res_columns, {DatabaseReplicated::ALL_GROUPS_CLUSTER_PREFIX + name_and_database.first, database_cluster}, + replicated->tryGetAreReplicasActive(database_cluster)); } } } void StorageSystemClusters::writeCluster(MutableColumns & res_columns, const NameAndCluster & name_and_cluster, - const ReplicasInfo & replicas_info) + const std::vector & is_active) { const String & cluster_name = name_and_cluster.first; const ClusterPtr & cluster = name_and_cluster.second; const auto & shards_info = cluster->getShardsInfo(); const auto & addresses_with_failover = cluster->getShardsAddresses(); - size_t global_replica_idx = 0; + size_t replica_idx = 0; for (size_t shard_index = 0; shard_index < shards_info.size(); ++shard_index) { const auto & shard_info = shards_info[shard_index]; @@ -100,24 +99,10 @@ void StorageSystemClusters::writeCluster(MutableColumns & res_columns, const Nam res_columns[i++]->insert(pool_status[replica_index].estimated_recovery_time.count()); res_columns[i++]->insert(address.database_shard_name); res_columns[i++]->insert(address.database_replica_name); - if (replicas_info.empty()) - { + if (is_active.empty()) res_columns[i++]->insertDefault(); - res_columns[i++]->insertDefault(); - res_columns[i++]->insertDefault(); - } else - { - const auto & replica_info = replicas_info[global_replica_idx]; - res_columns[i++]->insert(replica_info.is_active); - res_columns[i++]->insert(replica_info.replication_lag); - if (replica_info.recovery_time != 0) - res_columns[i++]->insert(replica_info.recovery_time); - else - res_columns[i++]->insertDefault(); - } - - ++global_replica_idx; + res_columns[i++]->insert(is_active[replica_idx++]); } } } diff --git a/src/Storages/System/StorageSystemClusters.h b/src/Storages/System/StorageSystemClusters.h index f6e08734896..0f7c792261d 100644 --- a/src/Storages/System/StorageSystemClusters.h +++ b/src/Storages/System/StorageSystemClusters.h @@ -1,10 +1,10 @@ #pragma once -#include #include #include #include + namespace DB { @@ -27,7 +27,7 @@ protected: using NameAndCluster = std::pair>; void fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const override; - static void writeCluster(MutableColumns & res_columns, const NameAndCluster & name_and_cluster, const ReplicasInfo & replicas_info); + static void writeCluster(MutableColumns & res_columns, const NameAndCluster & name_and_cluster, const std::vector & is_active); }; } diff --git a/tests/integration/test_recovery_time_metric/__init__.py b/tests/integration/test_recovery_time_metric/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/tests/integration/test_recovery_time_metric/configs/config.xml b/tests/integration/test_recovery_time_metric/configs/config.xml deleted file mode 100644 index bad9b1fa9ea..00000000000 --- a/tests/integration/test_recovery_time_metric/configs/config.xml +++ /dev/null @@ -1,41 +0,0 @@ - - 9000 - - - - - - - - - default - - - - - - 2181 - 1 - /var/lib/clickhouse/coordination/log - /var/lib/clickhouse/coordination/snapshots - - 20000 - - - - 1 - localhost - 9444 - - - - - - - localhost - 2181 - - 20000 - - - diff --git a/tests/integration/test_recovery_time_metric/test.py b/tests/integration/test_recovery_time_metric/test.py deleted file mode 100644 index 6fcf2fad423..00000000000 --- a/tests/integration/test_recovery_time_metric/test.py +++ /dev/null @@ -1,61 +0,0 @@ -import pytest -from helpers.cluster import ClickHouseCluster - -cluster = ClickHouseCluster(__file__) -node = cluster.add_instance( - "node", - main_configs=["configs/config.xml"], - stay_alive=True, -) - - -@pytest.fixture(scope="module") -def start_cluster(): - try: - cluster.start() - yield cluster - finally: - cluster.shutdown() - - -def test_recovery_time_metric(start_cluster): - node.query( - """ - DROP DATABASE IF EXISTS rdb; - CREATE DATABASE rdb - ENGINE = Replicated('/test/test_recovery_time_metric', 'shard1', 'replica1') - """ - ) - - node.query( - """ - DROP TABLE IF EXISTS rdb.t; - CREATE TABLE rdb.t - ( - `x` UInt32 - ) - ENGINE = MergeTree - ORDER BY x - """ - ) - - node.exec_in_container(["bash", "-c", "rm /var/lib/clickhouse/metadata/rdb/t.sql"]) - - node.restart_clickhouse() - - ret = int( - node.query( - """ - SELECT recovery_time - FROM system.clusters - WHERE cluster = 'rdb' - """ - ).strip() - ) - assert ret > 0 - - node.query( - """ - DROP DATABASE rdb - """ - ) diff --git a/tests/queries/0_stateless/02117_show_create_table_system.reference b/tests/queries/0_stateless/02117_show_create_table_system.reference index 32e8b2f4312..cfae4fee6c2 100644 --- a/tests/queries/0_stateless/02117_show_create_table_system.reference +++ b/tests/queries/0_stateless/02117_show_create_table_system.reference @@ -52,8 +52,6 @@ CREATE TABLE system.clusters `database_shard_name` String, `database_replica_name` String, `is_active` Nullable(UInt8), - `replication_lag` Nullable(UInt32), - `recovery_time` Nullable(UInt64), `name` String ALIAS cluster ) ENGINE = SystemClusters diff --git a/tests/queries/0_stateless/03206_replication_lag_metric.reference b/tests/queries/0_stateless/03206_replication_lag_metric.reference deleted file mode 100644 index 02f4a7264b1..00000000000 --- a/tests/queries/0_stateless/03206_replication_lag_metric.reference +++ /dev/null @@ -1,4 +0,0 @@ -0 -2 -0 -2 diff --git a/tests/queries/0_stateless/03206_replication_lag_metric.sql b/tests/queries/0_stateless/03206_replication_lag_metric.sql deleted file mode 100644 index 998c332a11c..00000000000 --- a/tests/queries/0_stateless/03206_replication_lag_metric.sql +++ /dev/null @@ -1,11 +0,0 @@ --- Tags: no-parallel - -CREATE DATABASE rdb1 ENGINE = Replicated('/test/test_replication_lag_metric', 'shard1', 'replica1'); -CREATE DATABASE rdb2 ENGINE = Replicated('/test/test_replication_lag_metric', 'shard1', 'replica2'); - -SET distributed_ddl_task_timeout = 0; -CREATE TABLE rdb1.t (id UInt32) ENGINE = ReplicatedMergeTree ORDER BY id; -SELECT replication_lag FROM system.clusters WHERE cluster IN ('rdb1', 'rdb2') ORDER BY cluster ASC, replica_num ASC; - -DROP DATABASE rdb1; -DROP DATABASE rdb2; From a19750234153e760907f3c7bc040f949100534df Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sat, 3 Aug 2024 23:11:20 +0200 Subject: [PATCH 647/661] Fix test retries Should fix issues like: - 02494_zero_copy_projection_cancel_fetch - https://s3.amazonaws.com/clickhouse-test-reports/67719/40cd5467c18d65a6624d273ac1a8fd9cc9257d8c/stateless_tests__tsan__s3_storage__[4_4].html Signed-off-by: Azat Khuzhin --- tests/clickhouse-test | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/clickhouse-test b/tests/clickhouse-test index a29c786e998..877548e577e 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -2218,7 +2218,6 @@ def run_tests_array(all_tests_with_params: Tuple[List[str], int, TestSuite, bool args, test_suite, client_options, server_logs_level ) test_result = test_case.process_result(test_result, MESSAGES) - break except TimeoutError: break finally: From f06ae2f5518ff8cb610b337d4900fd6f0088190f Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sat, 3 Aug 2024 23:27:19 +0200 Subject: [PATCH 648/661] Fill only selected columns from system.clusters Some of them pretty heavy, i.e. is_active for ReplicatedDatabase This should fix 02903_rmt_retriable_merge_exception flakiness [1]. [1]: https://s3.amazonaws.com/clickhouse-test-reports/67687/89c47df559ba23d988f8af3c342e0c8d5531f4b8/fast_test.html Signed-off-by: Azat Khuzhin --- src/Storages/System/StorageSystemClusters.cpp | 82 ++++++++++++------- src/Storages/System/StorageSystemClusters.h | 6 +- 2 files changed, 56 insertions(+), 32 deletions(-) diff --git a/src/Storages/System/StorageSystemClusters.cpp b/src/Storages/System/StorageSystemClusters.cpp index 160c8d6270e..9c5c07ae49f 100644 --- a/src/Storages/System/StorageSystemClusters.cpp +++ b/src/Storages/System/StorageSystemClusters.cpp @@ -40,10 +40,10 @@ ColumnsDescription StorageSystemClusters::getColumnsDescription() return description; } -void StorageSystemClusters::fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const +void StorageSystemClusters::fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector columns_mask) const { for (const auto & name_and_cluster : context->getClusters()) - writeCluster(res_columns, name_and_cluster, {}); + writeCluster(res_columns, columns_mask, name_and_cluster, /* replicated= */ nullptr); const auto databases = DatabaseCatalog::instance().getDatabases(); for (const auto & name_and_database : databases) @@ -52,18 +52,15 @@ void StorageSystemClusters::fillData(MutableColumns & res_columns, ContextPtr co { if (auto database_cluster = replicated->tryGetCluster()) - writeCluster(res_columns, {name_and_database.first, database_cluster}, - replicated->tryGetAreReplicasActive(database_cluster)); + writeCluster(res_columns, columns_mask, {name_and_database.first, database_cluster}, replicated); if (auto database_cluster = replicated->tryGetAllGroupsCluster()) - writeCluster(res_columns, {DatabaseReplicated::ALL_GROUPS_CLUSTER_PREFIX + name_and_database.first, database_cluster}, - replicated->tryGetAreReplicasActive(database_cluster)); + writeCluster(res_columns, columns_mask, {DatabaseReplicated::ALL_GROUPS_CLUSTER_PREFIX + name_and_database.first, database_cluster}, replicated); } } } -void StorageSystemClusters::writeCluster(MutableColumns & res_columns, const NameAndCluster & name_and_cluster, - const std::vector & is_active) +void StorageSystemClusters::writeCluster(MutableColumns & res_columns, const std::vector & columns_mask, const NameAndCluster & name_and_cluster, const DatabaseReplicated * replicated) { const String & cluster_name = name_and_cluster.first; const ClusterPtr & cluster = name_and_cluster.second; @@ -79,30 +76,55 @@ void StorageSystemClusters::writeCluster(MutableColumns & res_columns, const Nam for (size_t replica_index = 0; replica_index < shard_addresses.size(); ++replica_index) { - size_t i = 0; + size_t src_index = 0, res_index = 0; const auto & address = shard_addresses[replica_index]; - res_columns[i++]->insert(cluster_name); - res_columns[i++]->insert(shard_info.shard_num); - res_columns[i++]->insert(shard_info.weight); - res_columns[i++]->insert(shard_info.has_internal_replication); - res_columns[i++]->insert(replica_index + 1); - res_columns[i++]->insert(address.host_name); - auto resolved = address.getResolvedAddress(); - res_columns[i++]->insert(resolved ? resolved->host().toString() : String()); - res_columns[i++]->insert(address.port); - res_columns[i++]->insert(address.is_local); - res_columns[i++]->insert(address.user); - res_columns[i++]->insert(address.default_database); - res_columns[i++]->insert(pool_status[replica_index].error_count); - res_columns[i++]->insert(pool_status[replica_index].slowdown_count); - res_columns[i++]->insert(pool_status[replica_index].estimated_recovery_time.count()); - res_columns[i++]->insert(address.database_shard_name); - res_columns[i++]->insert(address.database_replica_name); - if (is_active.empty()) - res_columns[i++]->insertDefault(); - else - res_columns[i++]->insert(is_active[replica_idx++]); + if (columns_mask[src_index++]) + res_columns[res_index++]->insert(cluster_name); + if (columns_mask[src_index++]) + res_columns[res_index++]->insert(shard_info.shard_num); + if (columns_mask[src_index++]) + res_columns[res_index++]->insert(shard_info.weight); + if (columns_mask[src_index++]) + res_columns[res_index++]->insert(shard_info.has_internal_replication); + if (columns_mask[src_index++]) + res_columns[res_index++]->insert(replica_index + 1); + if (columns_mask[src_index++]) + res_columns[res_index++]->insert(address.host_name); + if (columns_mask[src_index++]) + { + auto resolved = address.getResolvedAddress(); + res_columns[res_index++]->insert(resolved ? resolved->host().toString() : String()); + } + if (columns_mask[src_index++]) + res_columns[res_index++]->insert(address.port); + if (columns_mask[src_index++]) + res_columns[res_index++]->insert(address.is_local); + if (columns_mask[src_index++]) + res_columns[res_index++]->insert(address.user); + if (columns_mask[src_index++]) + res_columns[res_index++]->insert(address.default_database); + if (columns_mask[src_index++]) + res_columns[res_index++]->insert(pool_status[replica_index].error_count); + if (columns_mask[src_index++]) + res_columns[res_index++]->insert(pool_status[replica_index].slowdown_count); + if (columns_mask[src_index++]) + res_columns[res_index++]->insert(pool_status[replica_index].estimated_recovery_time.count()); + if (columns_mask[src_index++]) + res_columns[res_index++]->insert(address.database_shard_name); + if (columns_mask[src_index++]) + res_columns[res_index++]->insert(address.database_replica_name); + if (columns_mask[src_index++]) + { + std::vector is_active; + if (replicated) + is_active = replicated->tryGetAreReplicasActive(name_and_cluster.second); + + if (is_active.empty()) + res_columns[res_index++]->insertDefault(); + else + res_columns[res_index++]->insert(is_active[replica_idx++]); + } } } } diff --git a/src/Storages/System/StorageSystemClusters.h b/src/Storages/System/StorageSystemClusters.h index 0f7c792261d..f6adb902f43 100644 --- a/src/Storages/System/StorageSystemClusters.h +++ b/src/Storages/System/StorageSystemClusters.h @@ -10,6 +10,7 @@ namespace DB class Context; class Cluster; +class DatabaseReplicated; /** Implements system table 'clusters' * that allows to obtain information about available clusters @@ -26,8 +27,9 @@ protected: using IStorageSystemOneBlock::IStorageSystemOneBlock; using NameAndCluster = std::pair>; - void fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const override; - static void writeCluster(MutableColumns & res_columns, const NameAndCluster & name_and_cluster, const std::vector & is_active); + void fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector columns_mask) const override; + static void writeCluster(MutableColumns & res_columns, const std::vector & columns_mask, const NameAndCluster & name_and_cluster, const DatabaseReplicated * replicated); + bool supportsColumnsMask() const override { return true; } }; } From 9d0e066cda8d0ccb6bd4f9e07fee36a2bfae707a Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sat, 3 Aug 2024 23:41:10 +0200 Subject: [PATCH 649/661] Bump NuRaft (to properly catch thread exceptions) Refs: https://github.com/ClickHouse/NuRaft/pull/75 Refs: https://github.com/eBay/NuRaft/pull/525 Signed-off-by: Azat Khuzhin --- contrib/NuRaft | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/NuRaft b/contrib/NuRaft index cb5dc3c906e..c2b0811f164 160000 --- a/contrib/NuRaft +++ b/contrib/NuRaft @@ -1 +1 @@ -Subproject commit cb5dc3c906e80f253e9ce9535807caef827cc2e0 +Subproject commit c2b0811f164a7948208489562dab4f186eb305ce From 8562a6106c286882f26383086e52e399106893be Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 3 Aug 2024 23:45:18 +0200 Subject: [PATCH 650/661] Better safety thresholds in `arrayWithConstant` --- src/Functions/array/arrayWithConstant.cpp | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/src/Functions/array/arrayWithConstant.cpp b/src/Functions/array/arrayWithConstant.cpp index 48262870553..4cbc6404b9b 100644 --- a/src/Functions/array/arrayWithConstant.cpp +++ b/src/Functions/array/arrayWithConstant.cpp @@ -1,9 +1,9 @@ #include -#include #include #include #include #include +#include namespace DB @@ -15,7 +15,8 @@ namespace ErrorCodes extern const int TOO_LARGE_ARRAY_SIZE; } -/// Reasonable threshold. +/// Reasonable thresholds. +static constexpr Int64 max_array_size_in_columns_bytes = 1000000000; static constexpr size_t max_arrays_size_in_columns = 1000000000; @@ -63,12 +64,19 @@ public: auto array_size = col_num->getInt(i); if (unlikely(array_size < 0)) - throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE, "Array size cannot be negative: while executing function {}", getName()); + throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE, "Array size {} cannot be negative: while executing function {}", array_size, getName()); + + Int64 estimated_size = 0; + if (unlikely(common::mulOverflow(array_size, col_value->byteSize(), estimated_size))) + throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE, "Array size {} with element size {} bytes is too large: while executing function {}", array_size, col_value->byteSize(), getName()); + + if (unlikely(estimated_size > max_array_size_in_columns_bytes)) + throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE, "Array size {} with element size {} bytes is too large: while executing function {}", array_size, col_value->byteSize(), getName()); offset += array_size; if (unlikely(offset > max_arrays_size_in_columns)) - throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE, "Too large array size while executing function {}", getName()); + throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE, "Too large array size {} (will generate at least {} elements) while executing function {}", array_size, offset, getName()); offsets.push_back(offset); } From 7a066a6505108b14bd49da8766c1bc473a978b1f Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 3 Aug 2024 23:48:51 +0200 Subject: [PATCH 651/661] Add a test --- .../0_stateless/03216_arrayWithConstant_limits.reference | 1 + tests/queries/0_stateless/03216_arrayWithConstant_limits.sql | 3 +++ 2 files changed, 4 insertions(+) create mode 100644 tests/queries/0_stateless/03216_arrayWithConstant_limits.reference create mode 100644 tests/queries/0_stateless/03216_arrayWithConstant_limits.sql diff --git a/tests/queries/0_stateless/03216_arrayWithConstant_limits.reference b/tests/queries/0_stateless/03216_arrayWithConstant_limits.reference new file mode 100644 index 00000000000..825319e1c5b --- /dev/null +++ b/tests/queries/0_stateless/03216_arrayWithConstant_limits.reference @@ -0,0 +1 @@ +10000000 diff --git a/tests/queries/0_stateless/03216_arrayWithConstant_limits.sql b/tests/queries/0_stateless/03216_arrayWithConstant_limits.sql new file mode 100644 index 00000000000..c46524c50e6 --- /dev/null +++ b/tests/queries/0_stateless/03216_arrayWithConstant_limits.sql @@ -0,0 +1,3 @@ +SELECT arrayWithConstant(96142475, ['qMUF']); -- { serverError TOO_LARGE_ARRAY_SIZE } +SELECT arrayWithConstant(100000000, materialize([[[[[[[[[['Hello, world!']]]]]]]]]])); -- { serverError TOO_LARGE_ARRAY_SIZE } +SELECT length(arrayWithConstant(10000000, materialize([[[[[[[[[['Hello world']]]]]]]]]]))); From 185b6a54da8f24a97f130bdebe7bb1ec2bd266c0 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 4 Aug 2024 00:34:58 +0200 Subject: [PATCH 652/661] Merge with master --- src/Core/SettingsChangesHistory.cpp | 261 +--------------------------- 1 file changed, 1 insertion(+), 260 deletions(-) diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp index 3f07bfdb933..5b94391bade 100644 --- a/src/Core/SettingsChangesHistory.cpp +++ b/src/Core/SettingsChangesHistory.cpp @@ -57,266 +57,6 @@ String ClickHouseVersion::toString() const /// Note: please check if the key already exists to prevent duplicate entries. static std::initializer_list> settings_changes_history_initializer = { - {"24.8", {{"input_format_json_max_depth", 1000000, 1000, "It was unlimited in previous versions, but that was unsafe."}}}, - {"24.7", {{"output_format_parquet_write_page_index", false, true, "Add a possibility to write page index into parquet files."}, - {"output_format_binary_encode_types_in_binary_format", false, false, "Added new setting to allow to write type names in binary format in RowBinaryWithNamesAndTypes output format"}, - {"input_format_binary_decode_types_in_binary_format", false, false, "Added new setting to allow to read type names in binary format in RowBinaryWithNamesAndTypes input format"}, - {"output_format_native_encode_types_in_binary_format", false, false, "Added new setting to allow to write type names in binary format in Native output format"}, - {"input_format_native_decode_types_in_binary_format", false, false, "Added new setting to allow to read type names in binary format in Native output format"}, - {"read_in_order_use_buffering", false, true, "Use buffering before merging while reading in order of primary key"}, - {"enable_named_columns_in_function_tuple", false, true, "Generate named tuples in function tuple() when all names are unique and can be treated as unquoted identifiers."}, - {"input_format_json_case_insensitive_column_matching", false, false, "Ignore case when matching JSON keys with CH columns."}, - {"optimize_trivial_insert_select", true, false, "The optimization does not make sense in many cases."}, - {"dictionary_validate_primary_key_type", false, false, "Validate primary key type for dictionaries. By default id type for simple layouts will be implicitly converted to UInt64."}, - {"collect_hash_table_stats_during_joins", false, true, "New setting."}, - {"max_size_to_preallocate_for_joins", 0, 100'000'000, "New setting."}, - {"input_format_orc_reader_time_zone_name", "GMT", "GMT", "The time zone name for ORC row reader, the default ORC row reader's time zone is GMT."}, - {"lightweight_mutation_projection_mode", "throw", "throw", "When lightweight delete happens on a table with projection(s), the possible operations include throw the exception as projection exists, or drop all projection related to this table then do lightweight delete."}, - {"database_replicated_allow_heavy_create", true, false, "Long-running DDL queries (CREATE AS SELECT and POPULATE) for Replicated database engine was forbidden"}, - {"query_plan_merge_filters", false, false, "Allow to merge filters in the query plan"}, - {"azure_sdk_max_retries", 10, 10, "Maximum number of retries in azure sdk"}, - {"azure_sdk_retry_initial_backoff_ms", 10, 10, "Minimal backoff between retries in azure sdk"}, - {"azure_sdk_retry_max_backoff_ms", 1000, 1000, "Maximal backoff between retries in azure sdk"}, - {"merge_tree_min_bytes_per_task_for_remote_reading", 4194304, 2097152, "Value is unified with `filesystem_prefetch_min_bytes_for_single_read_task`"}, - {"ignore_on_cluster_for_replicated_named_collections_queries", false, false, "Ignore ON CLUSTER clause for replicated named collections management queries."}, - {"backup_restore_s3_retry_attempts", 1000,1000, "Setting for Aws::Client::RetryStrategy, Aws::Client does retries itself, 0 means no retries. It takes place only for backup/restore."}, - {"postgresql_connection_attempt_timeout", 2, 2, "Allow to control 'connect_timeout' parameter of PostgreSQL connection."}, - {"postgresql_connection_pool_retries", 2, 2, "Allow to control the number of retries in PostgreSQL connection pool."} - }}, - {"24.6", {{"materialize_skip_indexes_on_insert", true, true, "Added new setting to allow to disable materialization of skip indexes on insert"}, - {"materialize_statistics_on_insert", true, true, "Added new setting to allow to disable materialization of statistics on insert"}, - {"input_format_parquet_use_native_reader", false, false, "When reading Parquet files, to use native reader instead of arrow reader."}, - {"hdfs_throw_on_zero_files_match", false, false, "Allow to throw an error when ListObjects request cannot match any files in HDFS engine instead of empty query result"}, - {"azure_throw_on_zero_files_match", false, false, "Allow to throw an error when ListObjects request cannot match any files in AzureBlobStorage engine instead of empty query result"}, - {"s3_validate_request_settings", true, true, "Allow to disable S3 request settings validation"}, - {"allow_experimental_full_text_index", false, false, "Enable experimental full-text index"}, - {"azure_skip_empty_files", false, false, "Allow to skip empty files in azure table engine"}, - {"hdfs_ignore_file_doesnt_exist", false, false, "Allow to return 0 rows when the requested files don't exist instead of throwing an exception in HDFS table engine"}, - {"azure_ignore_file_doesnt_exist", false, false, "Allow to return 0 rows when the requested files don't exist instead of throwing an exception in AzureBlobStorage table engine"}, - {"s3_ignore_file_doesnt_exist", false, false, "Allow to return 0 rows when the requested files don't exist instead of throwing an exception in S3 table engine"}, - {"s3_max_part_number", 10000, 10000, "Maximum part number number for s3 upload part"}, - {"s3_max_single_operation_copy_size", 32 * 1024 * 1024, 32 * 1024 * 1024, "Maximum size for a single copy operation in s3"}, - {"input_format_parquet_max_block_size", 8192, DEFAULT_BLOCK_SIZE, "Increase block size for parquet reader."}, - {"input_format_parquet_prefer_block_bytes", 0, DEFAULT_BLOCK_SIZE * 256, "Average block bytes output by parquet reader."}, - {"enable_blob_storage_log", true, true, "Write information about blob storage operations to system.blob_storage_log table"}, - {"allow_deprecated_snowflake_conversion_functions", true, false, "Disabled deprecated functions snowflakeToDateTime[64] and dateTime[64]ToSnowflake."}, - {"allow_statistic_optimize", false, false, "Old setting which popped up here being renamed."}, - {"allow_experimental_statistic", false, false, "Old setting which popped up here being renamed."}, - {"allow_statistics_optimize", false, false, "The setting was renamed. The previous name is `allow_statistic_optimize`."}, - {"allow_experimental_statistics", false, false, "The setting was renamed. The previous name is `allow_experimental_statistic`."}, - {"enable_vertical_final", false, true, "Enable vertical final by default again after fixing bug"}, - {"parallel_replicas_custom_key_range_lower", 0, 0, "Add settings to control the range filter when using parallel replicas with dynamic shards"}, - {"parallel_replicas_custom_key_range_upper", 0, 0, "Add settings to control the range filter when using parallel replicas with dynamic shards. A value of 0 disables the upper limit"}, - {"output_format_pretty_display_footer_column_names", 0, 1, "Add a setting to display column names in the footer if there are many rows. Threshold value is controlled by output_format_pretty_display_footer_column_names_min_rows."}, - {"output_format_pretty_display_footer_column_names_min_rows", 0, 50, "Add a setting to control the threshold value for setting output_format_pretty_display_footer_column_names_min_rows. Default 50."}, - {"output_format_csv_serialize_tuple_into_separate_columns", true, true, "A new way of how interpret tuples in CSV format was added."}, - {"input_format_csv_deserialize_separate_columns_into_tuple", true, true, "A new way of how interpret tuples in CSV format was added."}, - {"input_format_csv_try_infer_strings_from_quoted_tuples", true, true, "A new way of how interpret tuples in CSV format was added."}, - }}, - {"24.5", {{"allow_deprecated_error_prone_window_functions", true, false, "Allow usage of deprecated error prone window functions (neighbor, runningAccumulate, runningDifferenceStartingWithFirstValue, runningDifference)"}, - {"allow_experimental_join_condition", false, false, "Support join with inequal conditions which involve columns from both left and right table. e.g. t1.y < t2.y."}, - {"input_format_tsv_crlf_end_of_line", false, false, "Enables reading of CRLF line endings with TSV formats"}, - {"output_format_parquet_use_custom_encoder", false, true, "Enable custom Parquet encoder."}, - {"cross_join_min_rows_to_compress", 0, 10000000, "Minimal count of rows to compress block in CROSS JOIN. Zero value means - disable this threshold. This block is compressed when any of the two thresholds (by rows or by bytes) are reached."}, - {"cross_join_min_bytes_to_compress", 0, 1_GiB, "Minimal size of block to compress in CROSS JOIN. Zero value means - disable this threshold. This block is compressed when any of the two thresholds (by rows or by bytes) are reached."}, - {"http_max_chunk_size", 0, 0, "Internal limitation"}, - {"prefer_external_sort_block_bytes", 0, DEFAULT_BLOCK_SIZE * 256, "Prefer maximum block bytes for external sort, reduce the memory usage during merging."}, - {"input_format_force_null_for_omitted_fields", false, false, "Disable type-defaults for omitted fields when needed"}, - {"cast_string_to_dynamic_use_inference", false, false, "Add setting to allow converting String to Dynamic through parsing"}, - {"allow_experimental_dynamic_type", false, false, "Add new experimental Dynamic type"}, - {"azure_max_blocks_in_multipart_upload", 50000, 50000, "Maximum number of blocks in multipart upload for Azure."}, - }}, - {"24.4", {{"input_format_json_throw_on_bad_escape_sequence", true, true, "Allow to save JSON strings with bad escape sequences"}, - {"max_parsing_threads", 0, 0, "Add a separate setting to control number of threads in parallel parsing from files"}, - {"ignore_drop_queries_probability", 0, 0, "Allow to ignore drop queries in server with specified probability for testing purposes"}, - {"lightweight_deletes_sync", 2, 2, "The same as 'mutation_sync', but controls only execution of lightweight deletes"}, - {"query_cache_system_table_handling", "save", "throw", "The query cache no longer caches results of queries against system tables"}, - {"input_format_json_ignore_unnecessary_fields", false, true, "Ignore unnecessary fields and not parse them. Enabling this may not throw exceptions on json strings of invalid format or with duplicated fields"}, - {"input_format_hive_text_allow_variable_number_of_columns", false, true, "Ignore extra columns in Hive Text input (if file has more columns than expected) and treat missing fields in Hive Text input as default values."}, - {"allow_experimental_database_replicated", false, true, "Database engine Replicated is now in Beta stage"}, - {"temporary_data_in_cache_reserve_space_wait_lock_timeout_milliseconds", (10 * 60 * 1000), (10 * 60 * 1000), "Wait time to lock cache for sapce reservation in temporary data in filesystem cache"}, - {"optimize_rewrite_sum_if_to_count_if", false, true, "Only available for the analyzer, where it works correctly"}, - {"azure_allow_parallel_part_upload", "true", "true", "Use multiple threads for azure multipart upload."}, - {"max_recursive_cte_evaluation_depth", DBMS_RECURSIVE_CTE_MAX_EVALUATION_DEPTH, DBMS_RECURSIVE_CTE_MAX_EVALUATION_DEPTH, "Maximum limit on recursive CTE evaluation depth"}, - {"query_plan_convert_outer_join_to_inner_join", false, true, "Allow to convert OUTER JOIN to INNER JOIN if filter after JOIN always filters default values"}, - }}, - {"24.3", {{"s3_connect_timeout_ms", 1000, 1000, "Introduce new dedicated setting for s3 connection timeout"}, - {"allow_experimental_shared_merge_tree", false, true, "The setting is obsolete"}, - {"use_page_cache_for_disks_without_file_cache", false, false, "Added userspace page cache"}, - {"read_from_page_cache_if_exists_otherwise_bypass_cache", false, false, "Added userspace page cache"}, - {"page_cache_inject_eviction", false, false, "Added userspace page cache"}, - {"default_table_engine", "None", "MergeTree", "Set default table engine to MergeTree for better usability"}, - {"input_format_json_use_string_type_for_ambiguous_paths_in_named_tuples_inference_from_objects", false, false, "Allow to use String type for ambiguous paths during named tuple inference from JSON objects"}, - {"traverse_shadow_remote_data_paths", false, false, "Traverse shadow directory when query system.remote_data_paths."}, - {"throw_if_deduplication_in_dependent_materialized_views_enabled_with_async_insert", false, true, "Deduplication in dependent materialized view cannot work together with async inserts."}, - {"parallel_replicas_allow_in_with_subquery", false, true, "If true, subquery for IN will be executed on every follower replica"}, - {"log_processors_profiles", false, true, "Enable by default"}, - {"function_locate_has_mysql_compatible_argument_order", false, true, "Increase compatibility with MySQL's locate function."}, - {"allow_suspicious_primary_key", true, false, "Forbid suspicious PRIMARY KEY/ORDER BY for MergeTree (i.e. SimpleAggregateFunction)"}, - {"filesystem_cache_reserve_space_wait_lock_timeout_milliseconds", 1000, 1000, "Wait time to lock cache for sapce reservation in filesystem cache"}, - {"max_parser_backtracks", 0, 1000000, "Limiting the complexity of parsing"}, - {"analyzer_compatibility_join_using_top_level_identifier", false, false, "Force to resolve identifier in JOIN USING from projection"}, - {"distributed_insert_skip_read_only_replicas", false, false, "If true, INSERT into Distributed will skip read-only replicas"}, - {"keeper_max_retries", 10, 10, "Max retries for general keeper operations"}, - {"keeper_retry_initial_backoff_ms", 100, 100, "Initial backoff timeout for general keeper operations"}, - {"keeper_retry_max_backoff_ms", 5000, 5000, "Max backoff timeout for general keeper operations"}, - {"s3queue_allow_experimental_sharded_mode", false, false, "Enable experimental sharded mode of S3Queue table engine. It is experimental because it will be rewritten"}, - {"allow_experimental_analyzer", false, true, "Enable analyzer and planner by default."}, - {"merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability", 0.0, 0.0, "For testing of `PartsSplitter` - split read ranges into intersecting and non intersecting every time you read from MergeTree with the specified probability."}, - {"allow_get_client_http_header", false, false, "Introduced a new function."}, - {"output_format_pretty_row_numbers", false, true, "It is better for usability."}, - {"output_format_pretty_max_value_width_apply_for_single_value", true, false, "Single values in Pretty formats won't be cut."}, - {"output_format_parquet_string_as_string", false, true, "ClickHouse allows arbitrary binary data in the String data type, which is typically UTF-8. Parquet/ORC/Arrow Strings only support UTF-8. That's why you can choose which Arrow's data type to use for the ClickHouse String data type - String or Binary. While Binary would be more correct and compatible, using String by default will correspond to user expectations in most cases."}, - {"output_format_orc_string_as_string", false, true, "ClickHouse allows arbitrary binary data in the String data type, which is typically UTF-8. Parquet/ORC/Arrow Strings only support UTF-8. That's why you can choose which Arrow's data type to use for the ClickHouse String data type - String or Binary. While Binary would be more correct and compatible, using String by default will correspond to user expectations in most cases."}, - {"output_format_arrow_string_as_string", false, true, "ClickHouse allows arbitrary binary data in the String data type, which is typically UTF-8. Parquet/ORC/Arrow Strings only support UTF-8. That's why you can choose which Arrow's data type to use for the ClickHouse String data type - String or Binary. While Binary would be more correct and compatible, using String by default will correspond to user expectations in most cases."}, - {"output_format_parquet_compression_method", "lz4", "zstd", "Parquet/ORC/Arrow support many compression methods, including lz4 and zstd. ClickHouse supports each and every compression method. Some inferior tools, such as 'duckdb', lack support for the faster `lz4` compression method, that's why we set zstd by default."}, - {"output_format_orc_compression_method", "lz4", "zstd", "Parquet/ORC/Arrow support many compression methods, including lz4 and zstd. ClickHouse supports each and every compression method. Some inferior tools, such as 'duckdb', lack support for the faster `lz4` compression method, that's why we set zstd by default."}, - {"output_format_pretty_highlight_digit_groups", false, true, "If enabled and if output is a terminal, highlight every digit corresponding to the number of thousands, millions, etc. with underline."}, - {"geo_distance_returns_float64_on_float64_arguments", false, true, "Increase the default precision."}, - {"azure_max_inflight_parts_for_one_file", 20, 20, "The maximum number of a concurrent loaded parts in multipart upload request. 0 means unlimited."}, - {"azure_strict_upload_part_size", 0, 0, "The exact size of part to upload during multipart upload to Azure blob storage."}, - {"azure_min_upload_part_size", 16*1024*1024, 16*1024*1024, "The minimum size of part to upload during multipart upload to Azure blob storage."}, - {"azure_max_upload_part_size", 5ull*1024*1024*1024, 5ull*1024*1024*1024, "The maximum size of part to upload during multipart upload to Azure blob storage."}, - {"azure_upload_part_size_multiply_factor", 2, 2, "Multiply azure_min_upload_part_size by this factor each time azure_multiply_parts_count_threshold parts were uploaded from a single write to Azure blob storage."}, - {"azure_upload_part_size_multiply_parts_count_threshold", 500, 500, "Each time this number of parts was uploaded to Azure blob storage, azure_min_upload_part_size is multiplied by azure_upload_part_size_multiply_factor."}, - {"output_format_csv_serialize_tuple_into_separate_columns", true, true, "A new way of how interpret tuples in CSV format was added."}, - {"input_format_csv_deserialize_separate_columns_into_tuple", true, true, "A new way of how interpret tuples in CSV format was added."}, - {"input_format_csv_try_infer_strings_from_quoted_tuples", true, true, "A new way of how interpret tuples in CSV format was added."}, - }}, - {"24.2", {{"allow_suspicious_variant_types", true, false, "Don't allow creating Variant type with suspicious variants by default"}, - {"validate_experimental_and_suspicious_types_inside_nested_types", false, true, "Validate usage of experimental and suspicious types inside nested types"}, - {"output_format_values_escape_quote_with_quote", false, false, "If true escape ' with '', otherwise quoted with \\'"}, - {"output_format_pretty_single_large_number_tip_threshold", 0, 1'000'000, "Print a readable number tip on the right side of the table if the block consists of a single number which exceeds this value (except 0)"}, - {"input_format_try_infer_exponent_floats", true, false, "Don't infer floats in exponential notation by default"}, - {"query_plan_optimize_prewhere", true, true, "Allow to push down filter to PREWHERE expression for supported storages"}, - {"async_insert_max_data_size", 1000000, 10485760, "The previous value appeared to be too small."}, - {"async_insert_poll_timeout_ms", 10, 10, "Timeout in milliseconds for polling data from asynchronous insert queue"}, - {"async_insert_use_adaptive_busy_timeout", false, true, "Use adaptive asynchronous insert timeout"}, - {"async_insert_busy_timeout_min_ms", 50, 50, "The minimum value of the asynchronous insert timeout in milliseconds; it also serves as the initial value, which may be increased later by the adaptive algorithm"}, - {"async_insert_busy_timeout_max_ms", 200, 200, "The minimum value of the asynchronous insert timeout in milliseconds; async_insert_busy_timeout_ms is aliased to async_insert_busy_timeout_max_ms"}, - {"async_insert_busy_timeout_increase_rate", 0.2, 0.2, "The exponential growth rate at which the adaptive asynchronous insert timeout increases"}, - {"async_insert_busy_timeout_decrease_rate", 0.2, 0.2, "The exponential growth rate at which the adaptive asynchronous insert timeout decreases"}, - {"format_template_row_format", "", "", "Template row format string can be set directly in query"}, - {"format_template_resultset_format", "", "", "Template result set format string can be set in query"}, - {"split_parts_ranges_into_intersecting_and_non_intersecting_final", true, true, "Allow to split parts ranges into intersecting and non intersecting during FINAL optimization"}, - {"split_intersecting_parts_ranges_into_layers_final", true, true, "Allow to split intersecting parts ranges into layers during FINAL optimization"}, - {"azure_max_single_part_copy_size", 256*1024*1024, 256*1024*1024, "The maximum size of object to copy using single part copy to Azure blob storage."}, - {"min_external_table_block_size_rows", DEFAULT_INSERT_BLOCK_SIZE, DEFAULT_INSERT_BLOCK_SIZE, "Squash blocks passed to external table to specified size in rows, if blocks are not big enough"}, - {"min_external_table_block_size_bytes", DEFAULT_INSERT_BLOCK_SIZE * 256, DEFAULT_INSERT_BLOCK_SIZE * 256, "Squash blocks passed to external table to specified size in bytes, if blocks are not big enough."}, - {"parallel_replicas_prefer_local_join", true, true, "If true, and JOIN can be executed with parallel replicas algorithm, and all storages of right JOIN part are *MergeTree, local JOIN will be used instead of GLOBAL JOIN."}, - {"optimize_time_filter_with_preimage", true, true, "Optimize Date and DateTime predicates by converting functions into equivalent comparisons without conversions (e.g. toYear(col) = 2023 -> col >= '2023-01-01' AND col <= '2023-12-31')"}, - {"extract_key_value_pairs_max_pairs_per_row", 0, 0, "Max number of pairs that can be produced by the `extractKeyValuePairs` function. Used as a safeguard against consuming too much memory."}, - {"default_view_definer", "CURRENT_USER", "CURRENT_USER", "Allows to set default `DEFINER` option while creating a view"}, - {"default_materialized_view_sql_security", "DEFINER", "DEFINER", "Allows to set a default value for SQL SECURITY option when creating a materialized view"}, - {"default_normal_view_sql_security", "INVOKER", "INVOKER", "Allows to set default `SQL SECURITY` option while creating a normal view"}, - {"mysql_map_string_to_text_in_show_columns", false, true, "Reduce the configuration effort to connect ClickHouse with BI tools."}, - {"mysql_map_fixed_string_to_text_in_show_columns", false, true, "Reduce the configuration effort to connect ClickHouse with BI tools."}, - }}, - {"24.1", {{"print_pretty_type_names", false, true, "Better user experience."}, - {"input_format_json_read_bools_as_strings", false, true, "Allow to read bools as strings in JSON formats by default"}, - {"output_format_arrow_use_signed_indexes_for_dictionary", false, true, "Use signed indexes type for Arrow dictionaries by default as it's recommended"}, - {"allow_experimental_variant_type", false, false, "Add new experimental Variant type"}, - {"use_variant_as_common_type", false, false, "Allow to use Variant in if/multiIf if there is no common type"}, - {"output_format_arrow_use_64_bit_indexes_for_dictionary", false, false, "Allow to use 64 bit indexes type in Arrow dictionaries"}, - {"parallel_replicas_mark_segment_size", 128, 128, "Add new setting to control segment size in new parallel replicas coordinator implementation"}, - {"ignore_materialized_views_with_dropped_target_table", false, false, "Add new setting to allow to ignore materialized views with dropped target table"}, - {"output_format_compression_level", 3, 3, "Allow to change compression level in the query output"}, - {"output_format_compression_zstd_window_log", 0, 0, "Allow to change zstd window log in the query output when zstd compression is used"}, - {"enable_zstd_qat_codec", false, false, "Add new ZSTD_QAT codec"}, - {"enable_vertical_final", false, true, "Use vertical final by default"}, - {"output_format_arrow_use_64_bit_indexes_for_dictionary", false, false, "Allow to use 64 bit indexes type in Arrow dictionaries"}, - {"max_rows_in_set_to_optimize_join", 100000, 0, "Disable join optimization as it prevents from read in order optimization"}, - {"output_format_pretty_color", true, "auto", "Setting is changed to allow also for auto value, disabling ANSI escapes if output is not a tty"}, - {"function_visible_width_behavior", 0, 1, "We changed the default behavior of `visibleWidth` to be more precise"}, - {"max_estimated_execution_time", 0, 0, "Separate max_execution_time and max_estimated_execution_time"}, - {"iceberg_engine_ignore_schema_evolution", false, false, "Allow to ignore schema evolution in Iceberg table engine"}, - {"optimize_injective_functions_in_group_by", false, true, "Replace injective functions by it's arguments in GROUP BY section in analyzer"}, - {"update_insert_deduplication_token_in_dependent_materialized_views", false, false, "Allow to update insert deduplication token with table identifier during insert in dependent materialized views"}, - {"azure_max_unexpected_write_error_retries", 4, 4, "The maximum number of retries in case of unexpected errors during Azure blob storage write"}, - {"split_parts_ranges_into_intersecting_and_non_intersecting_final", false, true, "Allow to split parts ranges into intersecting and non intersecting during FINAL optimization"}, - {"split_intersecting_parts_ranges_into_layers_final", true, true, "Allow to split intersecting parts ranges into layers during FINAL optimization"}}}, - {"23.12", {{"allow_suspicious_ttl_expressions", true, false, "It is a new setting, and in previous versions the behavior was equivalent to allowing."}, - {"input_format_parquet_allow_missing_columns", false, true, "Allow missing columns in Parquet files by default"}, - {"input_format_orc_allow_missing_columns", false, true, "Allow missing columns in ORC files by default"}, - {"input_format_arrow_allow_missing_columns", false, true, "Allow missing columns in Arrow files by default"}}}, - {"23.11", {{"parsedatetime_parse_without_leading_zeros", false, true, "Improved compatibility with MySQL DATE_FORMAT/STR_TO_DATE"}}}, - {"23.9", {{"optimize_group_by_constant_keys", false, true, "Optimize group by constant keys by default"}, - {"input_format_json_try_infer_named_tuples_from_objects", false, true, "Try to infer named Tuples from JSON objects by default"}, - {"input_format_json_read_numbers_as_strings", false, true, "Allow to read numbers as strings in JSON formats by default"}, - {"input_format_json_read_arrays_as_strings", false, true, "Allow to read arrays as strings in JSON formats by default"}, - {"input_format_json_infer_incomplete_types_as_strings", false, true, "Allow to infer incomplete types as Strings in JSON formats by default"}, - {"input_format_json_try_infer_numbers_from_strings", true, false, "Don't infer numbers from strings in JSON formats by default to prevent possible parsing errors"}, - {"http_write_exception_in_output_format", false, true, "Output valid JSON/XML on exception in HTTP streaming."}}}, - {"23.8", {{"rewrite_count_distinct_if_with_count_distinct_implementation", false, true, "Rewrite countDistinctIf with count_distinct_implementation configuration"}}}, - {"23.7", {{"function_sleep_max_microseconds_per_block", 0, 3000000, "In previous versions, the maximum sleep time of 3 seconds was applied only for `sleep`, but not for `sleepEachRow` function. In the new version, we introduce this setting. If you set compatibility with the previous versions, we will disable the limit altogether."}}}, - {"23.6", {{"http_send_timeout", 180, 30, "3 minutes seems crazy long. Note that this is timeout for a single network write call, not for the whole upload operation."}, - {"http_receive_timeout", 180, 30, "See http_send_timeout."}}}, - {"23.5", {{"input_format_parquet_preserve_order", true, false, "Allow Parquet reader to reorder rows for better parallelism."}, - {"parallelize_output_from_storages", false, true, "Allow parallelism when executing queries that read from file/url/s3/etc. This may reorder rows."}, - {"use_with_fill_by_sorting_prefix", false, true, "Columns preceding WITH FILL columns in ORDER BY clause form sorting prefix. Rows with different values in sorting prefix are filled independently"}, - {"output_format_parquet_compliant_nested_types", false, true, "Change an internal field name in output Parquet file schema."}}}, - {"23.4", {{"allow_suspicious_indices", true, false, "If true, index can defined with identical expressions"}, - {"allow_nonconst_timezone_arguments", true, false, "Allow non-const timezone arguments in certain time-related functions like toTimeZone(), fromUnixTimestamp*(), snowflakeToDateTime*()."}, - {"connect_timeout_with_failover_ms", 50, 1000, "Increase default connect timeout because of async connect"}, - {"connect_timeout_with_failover_secure_ms", 100, 1000, "Increase default secure connect timeout because of async connect"}, - {"hedged_connection_timeout_ms", 100, 50, "Start new connection in hedged requests after 50 ms instead of 100 to correspond with previous connect timeout"}, - {"formatdatetime_f_prints_single_zero", true, false, "Improved compatibility with MySQL DATE_FORMAT()/STR_TO_DATE()"}, - {"formatdatetime_parsedatetime_m_is_month_name", false, true, "Improved compatibility with MySQL DATE_FORMAT/STR_TO_DATE"}}}, - {"23.3", {{"output_format_parquet_version", "1.0", "2.latest", "Use latest Parquet format version for output format"}, - {"input_format_json_ignore_unknown_keys_in_named_tuple", false, true, "Improve parsing JSON objects as named tuples"}, - {"input_format_native_allow_types_conversion", false, true, "Allow types conversion in Native input forma"}, - {"output_format_arrow_compression_method", "none", "lz4_frame", "Use lz4 compression in Arrow output format by default"}, - {"output_format_parquet_compression_method", "snappy", "lz4", "Use lz4 compression in Parquet output format by default"}, - {"output_format_orc_compression_method", "none", "lz4_frame", "Use lz4 compression in ORC output format by default"}, - {"async_query_sending_for_remote", false, true, "Create connections and send query async across shards"}}}, - {"23.2", {{"output_format_parquet_fixed_string_as_fixed_byte_array", false, true, "Use Parquet FIXED_LENGTH_BYTE_ARRAY type for FixedString by default"}, - {"output_format_arrow_fixed_string_as_fixed_byte_array", false, true, "Use Arrow FIXED_SIZE_BINARY type for FixedString by default"}, - {"query_plan_remove_redundant_distinct", false, true, "Remove redundant Distinct step in query plan"}, - {"optimize_duplicate_order_by_and_distinct", true, false, "Remove duplicate ORDER BY and DISTINCT if it's possible"}, - {"insert_keeper_max_retries", 0, 20, "Enable reconnections to Keeper on INSERT, improve reliability"}}}, - {"23.1", {{"input_format_json_read_objects_as_strings", 0, 1, "Enable reading nested json objects as strings while object type is experimental"}, - {"input_format_json_defaults_for_missing_elements_in_named_tuple", false, true, "Allow missing elements in JSON objects while reading named tuples by default"}, - {"input_format_csv_detect_header", false, true, "Detect header in CSV format by default"}, - {"input_format_tsv_detect_header", false, true, "Detect header in TSV format by default"}, - {"input_format_custom_detect_header", false, true, "Detect header in CustomSeparated format by default"}, - {"query_plan_remove_redundant_sorting", false, true, "Remove redundant sorting in query plan. For example, sorting steps related to ORDER BY clauses in subqueries"}}}, - {"22.12", {{"max_size_to_preallocate_for_aggregation", 10'000'000, 100'000'000, "This optimizes performance"}, - {"query_plan_aggregation_in_order", 0, 1, "Enable some refactoring around query plan"}, - {"format_binary_max_string_size", 0, 1_GiB, "Prevent allocating large amount of memory"}}}, - {"22.11", {{"use_structure_from_insertion_table_in_table_functions", 0, 2, "Improve using structure from insertion table in table functions"}}}, - {"22.9", {{"force_grouping_standard_compatibility", false, true, "Make GROUPING function output the same as in SQL standard and other DBMS"}}}, - {"22.7", {{"cross_to_inner_join_rewrite", 1, 2, "Force rewrite comma join to inner"}, - {"enable_positional_arguments", false, true, "Enable positional arguments feature by default"}, - {"format_csv_allow_single_quotes", true, false, "Most tools don't treat single quote in CSV specially, don't do it by default too"}}}, - {"22.6", {{"output_format_json_named_tuples_as_objects", false, true, "Allow to serialize named tuples as JSON objects in JSON formats by default"}, - {"input_format_skip_unknown_fields", false, true, "Optimize reading subset of columns for some input formats"}}}, - {"22.5", {{"memory_overcommit_ratio_denominator", 0, 1073741824, "Enable memory overcommit feature by default"}, - {"memory_overcommit_ratio_denominator_for_user", 0, 1073741824, "Enable memory overcommit feature by default"}}}, - {"22.4", {{"allow_settings_after_format_in_insert", true, false, "Do not allow SETTINGS after FORMAT for INSERT queries because ClickHouse interpret SETTINGS as some values, which is misleading"}}}, - {"22.3", {{"cast_ipv4_ipv6_default_on_conversion_error", true, false, "Make functions cast(value, 'IPv4') and cast(value, 'IPv6') behave same as toIPv4 and toIPv6 functions"}}}, - {"21.12", {{"stream_like_engine_allow_direct_select", true, false, "Do not allow direct select for Kafka/RabbitMQ/FileLog by default"}}}, - {"21.9", {{"output_format_decimal_trailing_zeros", true, false, "Do not output trailing zeros in text representation of Decimal types by default for better looking output"}, - {"use_hedged_requests", false, true, "Enable Hedged Requests feature by default"}}}, - {"21.7", {{"legacy_column_name_of_tuple_literal", true, false, "Add this setting only for compatibility reasons. It makes sense to set to 'true', while doing rolling update of cluster from version lower than 21.7 to higher"}}}, - {"21.5", {{"async_socket_for_remote", false, true, "Fix all problems and turn on asynchronous reads from socket for remote queries by default again"}}}, - {"21.3", {{"async_socket_for_remote", true, false, "Turn off asynchronous reads from socket for remote queries because of some problems"}, - {"optimize_normalize_count_variants", false, true, "Rewrite aggregate functions that semantically equals to count() as count() by default"}, - {"normalize_function_names", false, true, "Normalize function names to their canonical names, this was needed for projection query routing"}}}, - {"21.2", {{"enable_global_with_statement", false, true, "Propagate WITH statements to UNION queries and all subqueries by default"}}}, - {"21.1", {{"insert_quorum_parallel", false, true, "Use parallel quorum inserts by default. It is significantly more convenient to use than sequential quorum inserts"}, - {"input_format_null_as_default", false, true, "Allow to insert NULL as default for input formats by default"}, - {"optimize_on_insert", false, true, "Enable data optimization on INSERT by default for better user experience"}, - {"use_compact_format_in_distributed_parts_names", false, true, "Use compact format for async INSERT into Distributed tables by default"}}}, - {"20.10", {{"format_regexp_escaping_rule", "Escaped", "Raw", "Use Raw as default escaping rule for Regexp format to male the behaviour more like to what users expect"}}}, - {"20.7", {{"show_table_uuid_in_table_create_query_if_not_nil", true, false, "Stop showing UID of the table in its CREATE query for Engine=Atomic"}}}, - {"20.5", {{"input_format_with_names_use_header", false, true, "Enable using header with names for formats with WithNames/WithNamesAndTypes suffixes"}, - {"allow_suspicious_codecs", true, false, "Don't allow to specify meaningless compression codecs"}}}, - {"20.4", {{"validate_polygons", false, true, "Throw exception if polygon is invalid in function pointInPolygon by default instead of returning possibly wrong results"}}}, - {"19.18", {{"enable_scalar_subquery_optimization", false, true, "Prevent scalar subqueries from (de)serializing large scalar values and possibly avoid running the same subquery more than once"}}}, - {"19.14", {{"any_join_distinct_right_table_keys", true, false, "Disable ANY RIGHT and ANY FULL JOINs by default to avoid inconsistency"}}}, - {"19.12", {{"input_format_defaults_for_omitted_fields", false, true, "Enable calculation of complex default expressions for omitted fields for some input formats, because it should be the expected behaviour"}}}, - {"19.5", {{"max_partitions_per_insert_block", 0, 100, "Add a limit for the number of partitions in one block"}}}, - {"18.12.17", {{"enable_optimize_predicate_expression", 0, 1, "Optimize predicates to subqueries by default"}}}, {"24.12", { } @@ -335,6 +75,7 @@ static std::initializer_list Date: Sun, 4 Aug 2024 09:02:19 +0200 Subject: [PATCH 653/661] Use RabbitMQ without management in tests (attempt to improve startup) Sometimes startup can take ~90 seconds [1]: 2024-08-03 23:11:38.756067+00:00 [info] <0.9.0> Time to start RabbitMQ: 94651980 us Unlike normally ~10 [2]: 2024-08-04 00:33:07.016137+00:00 [info] <0.9.0> Time to start RabbitMQ: 10082489 us [1]: https://s3.amazonaws.com/clickhouse-test-reports/67737/b4e3bbcb82158bea4f5db1d9f5c28cfb741d1d51/integration_tests__asan__old_analyzer__[4_6].html [2]: https://s3.amazonaws.com/clickhouse-test-reports/66671/2f00c962711e13ca00af324366421fe4593b4ce6/integration_tests__asan__old_analyzer__[4_6].html I've tried locally, the difference is very small, 3135665 us (+management) vs (2740747 us), but still something, and who knows how it works under pressure. Signed-off-by: Azat Khuzhin --- tests/integration/compose/docker_compose_rabbitmq.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/compose/docker_compose_rabbitmq.yml b/tests/integration/compose/docker_compose_rabbitmq.yml index 61b21e0e3d9..94c7f0111c4 100644 --- a/tests/integration/compose/docker_compose_rabbitmq.yml +++ b/tests/integration/compose/docker_compose_rabbitmq.yml @@ -2,7 +2,7 @@ version: '2.3' services: rabbitmq1: - image: rabbitmq:3.12.6-management-alpine + image: rabbitmq:3.12.6-alpine hostname: rabbitmq1 expose: - ${RABBITMQ_PORT:-5672} From 264be9c598b42d91ee0a19f718c4d9a4291c7bc4 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sun, 4 Aug 2024 09:06:50 +0200 Subject: [PATCH 654/661] tests: increase timeout for RabbitMQ startup Signed-off-by: Azat Khuzhin --- tests/integration/helpers/cluster.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index 9259c720ff0..a1bdee33d57 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -2371,7 +2371,7 @@ class ClickHouseCluster: time.sleep(0.5) raise Exception("Cannot wait PostgreSQL Java Client container") - def wait_rabbitmq_to_start(self, timeout=30): + def wait_rabbitmq_to_start(self, timeout=60): self.print_all_docker_pieces() self.rabbitmq_ip = self.get_instance_ip(self.rabbitmq_host) From dc527b6fd1dfdffb1d177237bffc69cd110cd2a7 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sun, 4 Aug 2024 09:07:05 +0200 Subject: [PATCH 655/661] tests: detailed errors for RabbitMQ startup Signed-off-by: Azat Khuzhin --- tests/integration/helpers/cluster.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index a1bdee33d57..56d111629c9 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -2399,7 +2399,7 @@ class ClickHouseCluster: ) rabbitmq_debuginfo(self.rabbitmq_docker_id, self.rabbitmq_cookie) except Exception as e: - logging.debug("Unable to get logs from docker.") + logging.debug(f"Unable to get logs from docker: {e}.") raise Exception("Cannot wait RabbitMQ container") def wait_nats_is_available(self, max_retries=5): From f0aaac3bd19c21a796f54ba080fd67f92959131a Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sun, 4 Aug 2024 14:14:28 +0200 Subject: [PATCH 656/661] tests: remove useless retries from test_ttl_move::test_alter_with_merge_work Signed-off-by: Azat Khuzhin --- tests/integration/test_ttl_move/test.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/integration/test_ttl_move/test.py b/tests/integration/test_ttl_move/test.py index 3b79ea7916d..48a6224347d 100644 --- a/tests/integration/test_ttl_move/test.py +++ b/tests/integration/test_ttl_move/test.py @@ -76,7 +76,7 @@ def get_used_disks_for_table(node, table_name, partition=None): ) -def check_used_disks_with_retry(node, table_name, expected_disks, retries): +def check_used_disks_with_retry(node, table_name, expected_disks, retries=1): for _ in range(retries): used_disks = get_used_disks_for_table(node, table_name) if set(used_disks).issubset(expected_disks): @@ -1635,9 +1635,9 @@ def test_alter_with_merge_work(started_cluster, name, engine, positive): optimize_table(20) if positive: - assert check_used_disks_with_retry(node1, name, set(["external"]), 100) + assert check_used_disks_with_retry(node1, name, set(["external"])) else: - assert check_used_disks_with_retry(node1, name, set(["jbod1", "jbod2"]), 50) + assert check_used_disks_with_retry(node1, name, set(["jbod1", "jbod2"])) time.sleep(5) From 47dbc5e05b12213a08c25ade9536603a3fd2b175 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sun, 4 Aug 2024 14:16:17 +0200 Subject: [PATCH 657/661] tests: add debug info into test_ttl_move::test_alter_with_merge_work Signed-off-by: Azat Khuzhin --- tests/integration/test_ttl_move/test.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/tests/integration/test_ttl_move/test.py b/tests/integration/test_ttl_move/test.py index 48a6224347d..4ebe9a30699 100644 --- a/tests/integration/test_ttl_move/test.py +++ b/tests/integration/test_ttl_move/test.py @@ -1635,9 +1635,17 @@ def test_alter_with_merge_work(started_cluster, name, engine, positive): optimize_table(20) if positive: - assert check_used_disks_with_retry(node1, name, set(["external"])) + assert check_used_disks_with_retry( + node1, name, set(["external"]) + ), "Parts: " + node1.query( + f"SELECT disk_name, name FROM system.parts WHERE table = '{name}' AND active = 1" + ) else: - assert check_used_disks_with_retry(node1, name, set(["jbod1", "jbod2"])) + assert check_used_disks_with_retry( + node1, name, set(["jbod1", "jbod2"]) + ), "Parts: " + node1.query( + f"SELECT disk_name, name FROM system.parts WHERE table = '{name}' AND active = 1" + ) time.sleep(5) From 062490e1b40a8df8d63fca567b11e7dd26cf52ee Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sun, 4 Aug 2024 14:22:24 +0200 Subject: [PATCH 658/661] tests: fix test_ttl_move::test_alter_with_merge_work flakiness MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Increase timeout for TTL DELETE, since otherwise if other routines will take too long, the part will be removed when it should be still be on "external" disk: 2024.08.04 03:48:53.803032 [ 622 ] {} default.mt_test_alter_with_merge_work_1722743323 (9dc6904a-f082-4f06-be7a-efe4733e811c): Will drop empty part all_1_3_4_4 And this is how part_log looks like: SELECT event_time, event_type, rows, part_name, error, database, disk_name FROM system.part_log WHERE `table` = 'mt_test_alter_with_merge_work_1722743323' ORDER BY event_time ASC Query id: a118b3cd-e4fe-45a5-b675-d73bdd887d79 ┌──────────event_time─┬─event_type─┬─rows─┬─part_name───┬─error─┬─database─┬─disk_name─┐ 1. │ 2024-08-04 03:48:44 │ NewPart │ 2 │ all_1_1_0 │ 0 │ default │ jbod1 │ 2. │ 2024-08-04 03:48:44 │ NewPart │ 2 │ all_2_2_0 │ 0 │ default │ jbod2 │ 3. │ 2024-08-04 03:48:45 │ NewPart │ 2 │ all_3_3_0 │ 0 │ default │ jbod1 │ 4. │ 2024-08-04 03:48:46 │ MutatePart │ 2 │ all_1_1_0_4 │ 0 │ default │ jbod1 │ 5. │ 2024-08-04 03:48:46 │ MutatePart │ 2 │ all_2_2_0_4 │ 0 │ default │ jbod2 │ 6. │ 2024-08-04 03:48:46 │ MutatePart │ 2 │ all_3_3_0_4 │ 0 │ default │ jbod1 │ 7. │ 2024-08-04 03:48:47 │ MovePart │ 2 │ all_1_1_0_4 │ 0 │ default │ external │ 8. │ 2024-08-04 03:48:47 │ MovePart │ 2 │ all_3_3_0_4 │ 0 │ default │ jbod2 │ 9. │ 2024-08-04 03:48:47 │ MergeParts │ 6 │ all_1_3_1_4 │ 0 │ default │ jbod2 │ 10. │ 2024-08-04 03:48:48 │ MovePart │ 6 │ all_1_3_1_4 │ 0 │ default │ external │ 11. │ 2024-08-04 03:48:52 │ MergeParts │ 4 │ all_1_3_2_4 │ 0 │ default │ external │ 12. │ 2024-08-04 03:48:53 │ MergeParts │ 0 │ all_1_3_3_4 │ 0 │ default │ external │ # rows==0 13. │ 2024-08-04 03:48:53 │ MergeParts │ 0 │ all_1_3_4_4 │ 0 │ default │ external │ └─────────────────────┴────────────┴──────┴─────────────┴───────┴──────────┴───────────┘ CI: https://s3.amazonaws.com/clickhouse-test-reports/66671/2f00c962711e13ca00af324366421fe4593b4ce6/integration_tests__tsan__[5_6].html Signed-off-by: Azat Khuzhin --- tests/integration/test_ttl_move/test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/integration/test_ttl_move/test.py b/tests/integration/test_ttl_move/test.py index 4ebe9a30699..925bdf9baaa 100644 --- a/tests/integration/test_ttl_move/test.py +++ b/tests/integration/test_ttl_move/test.py @@ -1613,7 +1613,7 @@ def test_alter_with_merge_work(started_cluster, name, engine, positive): ALTER TABLE {name} MODIFY TTL d1 + INTERVAL 0 SECOND TO DISK 'jbod2', d1 + INTERVAL 5 SECOND TO VOLUME 'external', - d1 + INTERVAL 10 SECOND DELETE + d1 + INTERVAL 30 SECOND DELETE """.format( name=name ) @@ -1647,7 +1647,7 @@ def test_alter_with_merge_work(started_cluster, name, engine, positive): f"SELECT disk_name, name FROM system.parts WHERE table = '{name}' AND active = 1" ) - time.sleep(5) + time.sleep(25) optimize_table(20) From 9f31488e502c2b2c02e3058f9794829aac14f8b9 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sun, 4 Aug 2024 15:43:18 +0200 Subject: [PATCH 659/661] Fix dictionary hang in case of CANNOT_SCHEDULE_TASK while loading On CI you can find that 01747_executable_pool_dictionary_implicit_key can hang [1], it is possible due to after CANNOT_SCHEDULE_TASK the async loading will hang: 2024.07.18 03:56:32.365226 [ 6138 ] {6206a18f-668c-4a5c-a5ad-07f577220762} ExternalDictionariesLoader: Will load the object 'executable_pool_simple_implicit_key' in background, force = false, loading_id = 2 2024.07.18 03:56:32.368005 [ 6138 ] {6206a18f-668c-4a5c-a5ad-07f577220762} executeQuery: Code: 439. DB::Exception: Cannot schedule a task: fault injected (threads=766, jobs=746): In scope SELECT dictGet('executable_pool_simple_implicit_key', 'a', toUInt64(1)). (CANNOT_SCHEDULE_TASK) (version 24.7.1.2241) (from [::1]:56446) (comment: 01747_executable_pool_dictionary_implicit_key.sql) (in query: SELECT dictGet('executable_pool_simple_implicit_key', 'a', toUInt64(1));), Stack trace (when copying this message, always include the lines below): 0. /build/contrib/llvm-project/libcxx/include/exception:141: Poco::Exception::Exception(String const&, int) @ 0x0000000015f8a292 1. /build/src/Common/Exception.cpp:110: DB::Exception::Exception(DB::Exception::MessageMasked&&, int, bool) @ 0x000000000c3df6b9 2. /build/contrib/llvm-project/libcxx/include/string:1499: DB::Exception::Exception(PreformattedMessage&&, int) @ 0x0000000006de714c 3. /build/contrib/llvm-project/libcxx/include/vector:438: DB::Exception::Exception(int, FormatStringHelperImpl::type, std::type_identity::type, std::type_identity::type>, String const&, unsigned long&&, unsigned long&) @ 0x000000000c4838eb 4. /build/src/Common/ThreadPool.cpp:0: void ThreadPoolImpl::scheduleImpl(std::function, Priority, std::optional, bool)::'lambda'(String const&)::operator()(String const&) const @ 0x000000000c4832d3 5. /build/src/Common/ThreadPool.cpp:186: void ThreadPoolImpl::scheduleImpl(std::function, Priority, std::optional, bool) @ 0x000000000c47e7db 6. /build/contrib/llvm-project/libcxx/include/__functional/function.h:818: ? @ 0x000000000c47ec8d 7. /build/contrib/llvm-project/libcxx/include/__functional/function.h:818: ? @ 0x000000001114b16e 8. /build/contrib/llvm-project/libcxx/include/__memory/shared_ptr.h:701: DB::ExternalLoader::LoadingDispatcher::startLoading(DB::ExternalLoader::LoadingDispatcher::Info&, bool, unsigned long) @ 0x0000000011147733 9. /build/src/Interpreters/ExternalLoader.cpp:837: DB::ExternalLoader::LoadingDispatcher::loadImpl(String const&, std::chrono::duration>, bool, std::unique_lock&)::'lambda'()::operator()() const @ 0x0000000011158bf9 10. /build/contrib/llvm-project/libcxx/include/__mutex_base:397: DB::ExternalLoader::LoadingDispatcher::loadImpl(String const&, std::chrono::duration>, bool, std::unique_lock&) @ 0x00000000111588bc 11. /build/src/Interpreters/ExternalLoader.cpp:604: DB::ExternalLoader::LoadResult DB::ExternalLoader::LoadingDispatcher::tryLoad(String const&, std::chrono::duration>) @ 0x00000000111440bf 12. /build/src/Interpreters/ExternalLoader.cpp:1381: std::shared_ptr DB::ExternalLoader::load, void>(String const&) const @ 0x00000000111442f5 13. /build/contrib/llvm-project/libcxx/include/__memory/shared_ptr.h:587: DB::ExternalDictionariesLoader::getDictionary(String const&, std::shared_ptr) const @ 0x0000000011141028 14. /build/src/Functions/FunctionsExternalDictionaries.h:76: DB::FunctionDictHelper::getDictionary(String const&) @ 0x00000000071d28ec ... 2024.07.18 03:58:29.000900 [ 48468 ] {8cf63d7e-dcbf-4af6-bd7c-0e1789ddce3b} executeQuery: (from [::1]:40410) (comment: 01747_executable_pool_dictionary_implicit_key.sql) SELECT dictGet('executable_pool_simple_implicit_key', 'a', toUInt64(1)); (stage: Complete) # and no more rows for 8cf63d7e-dcbf-4af6-bd7c-0e1789ddce3b [1]: https://s3.amazonaws.com/clickhouse-test-reports/66495/bc029ed8207ac75e96e9cb48cb79d27a9ffa4e2f/stress_test__debug_.html The problem that it should be properly cancelled, otherwise it will not be loaded in loadImpl(), but will be waited. Signed-off-by: Azat Khuzhin --- src/Interpreters/ExternalLoader.cpp | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/Interpreters/ExternalLoader.cpp b/src/Interpreters/ExternalLoader.cpp index 96405f35f3f..511300be2e0 100644 --- a/src/Interpreters/ExternalLoader.cpp +++ b/src/Interpreters/ExternalLoader.cpp @@ -922,7 +922,16 @@ private: if (enable_async_loading) { /// Put a job to the thread pool for the loading. - auto thread = ThreadFromGlobalPool{&LoadingDispatcher::doLoading, this, info.name, loading_id, forced_to_reload, min_id_to_finish_loading_dependencies_, true, CurrentThread::getGroup()}; + ThreadFromGlobalPool thread; + try + { + thread = ThreadFromGlobalPool{&LoadingDispatcher::doLoading, this, info.name, loading_id, forced_to_reload, min_id_to_finish_loading_dependencies_, true, CurrentThread::getGroup()}; + } + catch (...) + { + cancelLoading(info); + throw; + } loading_threads.try_emplace(loading_id, std::move(thread)); } else From c200f437746ddbd03e5ce7c7ad9d9613015ce929 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sun, 4 Aug 2024 16:00:46 +0200 Subject: [PATCH 660/661] ci: fix stateless runner to correctly gather artifacts when server failed Right now it simply does not work due to "set -e", with it you cannot use "foo=$(false)" since bash will break execution after, rewrite it to a plain shell, with "if". Also use ZSTD everywhere (ugh) Signed-off-by: Azat Khuzhin --- docker/test/stateless/run.sh | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh index da17b82d91b..5c15c05652b 100755 --- a/docker/test/stateless/run.sh +++ b/docker/test/stateless/run.sh @@ -299,22 +299,22 @@ stop_logs_replication failed_to_save_logs=0 for table in query_log zookeeper_log trace_log transactions_info_log metric_log blob_storage_log error_log do - err=$(clickhouse-client -q "select * from system.$table into outfile '/test_output/$table.tsv.gz' format TSVWithNamesAndTypes") - echo "$err" - [[ "0" != "${#err}" ]] && failed_to_save_logs=1 + if ! clickhouse-client -q "select * from system.$table into outfile '/test_output/$table.tsv.zst' format TSVWithNamesAndTypes"; then + failed_to_save_logs=1 + fi if [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then - err=$( { clickhouse-client --port 19000 -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.1.tsv.zst; } 2>&1 ) - echo "$err" - [[ "0" != "${#err}" ]] && failed_to_save_logs=1 - err=$( { clickhouse-client --port 29000 -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.2.tsv.zst; } 2>&1 ) - echo "$err" - [[ "0" != "${#err}" ]] && failed_to_save_logs=1 + if ! clickhouse-client --port 19000 -q "select * from system.$table into outfile '/test_output/$table.1.tsv.zst' format TSVWithNamesAndTypes"; then + failed_to_save_logs=1 + fi + if ! clickhouse-client --port 29000 -q "select * from system.$table into outfile '/test_output/$table.2.tsv.zst' format TSVWithNamesAndTypes"; then + failed_to_save_logs=1 + fi fi if [[ "$USE_SHARED_CATALOG" -eq 1 ]]; then - err=$( { clickhouse-client --port 19000 -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.1.tsv.zst; } 2>&1 ) - echo "$err" - [[ "0" != "${#err}" ]] && failed_to_save_logs=1 + if ! clickhouse-client --port 29000 -q "select * from system.$table into outfile '/test_output/$table.2.tsv.zst' format TSVWithNamesAndTypes"; then + failed_to_save_logs=1 + fi fi done From 43cf85ef2841190a5d12a592a65a5181bb110661 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sun, 4 Aug 2024 16:09:48 +0200 Subject: [PATCH 661/661] ci: collect basic issues in stateless tests (dmesg, fatal and similar) Signed-off-by: Azat Khuzhin --- docker/test/stateless/run.sh | 9 +++++++++ docker/test/stateless/stress_tests.lib | 2 +- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh index 5c15c05652b..b352539cc1a 100755 --- a/docker/test/stateless/run.sh +++ b/docker/test/stateless/run.sh @@ -3,6 +3,12 @@ # shellcheck disable=SC1091 source /setup_export_logs.sh +# shellcheck source=../stateless/stress_tests.lib +source /stress_tests.lib + +# Avoid overlaps with previous runs +dmesg --clear + # fail on errors, verbose and export all env variables set -e -x -a @@ -420,4 +426,7 @@ if [[ "$USE_SHARED_CATALOG" -eq 1 ]]; then tar -chf /test_output/coordination1.tar /var/lib/clickhouse1/coordination ||: fi +# Grep logs for sanitizer asserts, crashes and other critical errors +check_logs_for_critical_errors + collect_core_dumps diff --git a/docker/test/stateless/stress_tests.lib b/docker/test/stateless/stress_tests.lib index 682da1df837..36782101fa7 100644 --- a/docker/test/stateless/stress_tests.lib +++ b/docker/test/stateless/stress_tests.lib @@ -242,7 +242,7 @@ function check_server_start() function check_logs_for_critical_errors() { # Sanitizer asserts - sed -n '/WARNING:.*anitizer/,/^$/p' /var/log/clickhouse-server/stderr.log >> /test_output/tmp + sed -n '/WARNING:.*anitizer/,/^$/p' /var/log/clickhouse-server/stderr*.log >> /test_output/tmp rg -Fav -e "ASan doesn't fully support makecontext/swapcontext functions" -e "DB::Exception" /test_output/tmp > /dev/null \ && echo -e "Sanitizer assert (in stderr.log)$FAIL$(head_escaped /test_output/tmp)" >> /test_output/test_results.tsv \ || echo -e "No sanitizer asserts$OK" >> /test_output/test_results.tsv