From ec17e28a29ab47ab54ce805d3c01e933afc4c0d0 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Wed, 7 Dec 2022 07:47:16 +0000 Subject: [PATCH 01/35] Add classes for new pass --- .../Passes/IfTransformStringsToEnumPass.cpp | 12 +++++++++++ .../Passes/IfTransformStringsToEnumPass.h | 21 +++++++++++++++++++ .../ConvertStringsToEnumVisitor.cpp | 2 +- 3 files changed, 34 insertions(+), 1 deletion(-) create mode 100644 src/Analyzer/Passes/IfTransformStringsToEnumPass.cpp create mode 100644 src/Analyzer/Passes/IfTransformStringsToEnumPass.h diff --git a/src/Analyzer/Passes/IfTransformStringsToEnumPass.cpp b/src/Analyzer/Passes/IfTransformStringsToEnumPass.cpp new file mode 100644 index 00000000000..4dcfcaf61a7 --- /dev/null +++ b/src/Analyzer/Passes/IfTransformStringsToEnumPass.cpp @@ -0,0 +1,12 @@ +#include +#include "Analyzer/IQueryTreeNode.h" +#include "Interpreters/Context_fwd.h" + +namespace DB +{ + +void IfTransformStringsToEnumPass::run(QueryTreeNodePtr, ContextPtr) +{ +} + +} diff --git a/src/Analyzer/Passes/IfTransformStringsToEnumPass.h b/src/Analyzer/Passes/IfTransformStringsToEnumPass.h new file mode 100644 index 00000000000..03456bbbb2e --- /dev/null +++ b/src/Analyzer/Passes/IfTransformStringsToEnumPass.h @@ -0,0 +1,21 @@ +#pragma once + +#include + +namespace DB +{ + +/** + * This pass replaces string-type arguments in If and Transform to enum. + */ +class IfTransformStringsToEnumPass final : public IQueryTreePass +{ +public: + String getName() override { return "IfTransformStringsToEnumPass"; } + + String getDescription() override { return "Replaces string-type arguments in If and Transform to enum"; } + + void run(QueryTreeNodePtr query_tree_node, ContextPtr context) override; +}; + +} diff --git a/src/Interpreters/ConvertStringsToEnumVisitor.cpp b/src/Interpreters/ConvertStringsToEnumVisitor.cpp index 745e484022c..b141f75fbec 100644 --- a/src/Interpreters/ConvertStringsToEnumVisitor.cpp +++ b/src/Interpreters/ConvertStringsToEnumVisitor.cpp @@ -141,7 +141,7 @@ void ConvertStringsToEnumMatcher::visit(ASTFunction & function_node, Data & data if (function_node.name == "if") { - if (function_node.arguments->children.size() != 2) + if (function_node.arguments->children.size() != 3) return; const ASTLiteral * literal1 = function_node.arguments->children[1]->as(); From 23c040d3596a2f531ff2f70c23f2cf4be0ce1122 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Wed, 7 Dec 2022 11:44:00 +0000 Subject: [PATCH 02/35] Finish optimize_if_transform_strings_to_enum --- .../Passes/IfTransformStringsToEnumPass.cpp | 300 +++++++++++++++++- src/Analyzer/QueryTreePassManager.cpp | 6 +- src/Parsers/parseQuery.cpp | 1 + 3 files changed, 303 insertions(+), 4 deletions(-) diff --git a/src/Analyzer/Passes/IfTransformStringsToEnumPass.cpp b/src/Analyzer/Passes/IfTransformStringsToEnumPass.cpp index 4dcfcaf61a7..51defd8de1f 100644 --- a/src/Analyzer/Passes/IfTransformStringsToEnumPass.cpp +++ b/src/Analyzer/Passes/IfTransformStringsToEnumPass.cpp @@ -1,12 +1,306 @@ #include -#include "Analyzer/IQueryTreeNode.h" -#include "Interpreters/Context_fwd.h" + +#include +#include +#include +#include + +#include +#include +#include +#include + +#include + +#include namespace DB { -void IfTransformStringsToEnumPass::run(QueryTreeNodePtr, ContextPtr) +namespace { + +/// Visitor for finding functions that are used inside another function +class FindUsedFunctionsVisitor : public ConstInDepthQueryTreeVisitor +{ +public: + FindUsedFunctionsVisitor( + std::unordered_set & used_functions_, + const std::unordered_set & function_names_, + size_t stack_size_) + : used_functions(used_functions_), function_names(function_names_), stack_size(stack_size_) + { + } + + bool needChildVisit(VisitQueryTreeNodeType & parent, VisitQueryTreeNodeType & /* child */) + { + return parent->getNodeType() != QueryTreeNodeType::FUNCTION; + } + + void visitImpl(const QueryTreeNodePtr & node) + { + auto * function_node = node->as(); + + if (!function_node) + return; + + const auto & function_name = function_node->getFunctionName(); + if (function_names.contains(function_name) && stack_size > 0) + { + const auto & alias = function_node->getAlias(); + + if (!alias.empty()) + used_functions.insert(alias); + } + + FindUsedFunctionsVisitor visitor(used_functions, function_names, stack_size + 1); + visitor.visit(function_node->getArgumentsNode()); + } + +private: + /// we store only function aliases which are not modified, including the nodes owning it + /// we will modify only argument nodes + std::unordered_set & used_functions; + const std::unordered_set & function_names; + size_t stack_size; +}; + +template +std::string makeStringsEnum(const EnumType & enum_type) +{ + std::string enum_string; + + if constexpr (std::same_as) + { + enum_string = "Enum16("; + } + else + { + static_assert(std::same_as, "Invalid field type for enum"); + enum_string = "Enum8("; + } + + const auto & values = enum_type.getValues(); + for (const auto & [value_string, value_index] : values) + { + enum_string += "\'" + value_string + "\' = " + std::to_string(value_index); + + assert(value_index > 0); + if (static_cast(value_index) < values.size()) + enum_string += ", "; + } + + enum_string += ")"; + + return enum_string; +} + +/// We place strings in ascending order here under the assumption it colud speed up String to Enum conversion. +template +auto getDataEnumType(const std::set & string_values) +{ + using EnumValues = typename EnumType::Values; + EnumValues enum_values; + enum_values.reserve(string_values.size()); + + size_t number = 1; + for (const auto & value : string_values) + enum_values.emplace_back(value, number++); + + return std::make_shared(std::move(enum_values)); +} + +std::pair getEnumTypeAndString(const std::set & string_values) +{ + DataTypePtr result_type = nullptr; + std::string enum_string; + if (string_values.size() >= 255) + { + auto enum_type = getDataEnumType(string_values); + enum_string = makeStringsEnum(*enum_type); + result_type = std::move(enum_type); + } + else + { + auto enum_type = getDataEnumType(string_values); + enum_string = makeStringsEnum(*enum_type); + result_type = std::move(enum_type); + } + + return {std::move(result_type), std::move(enum_string)}; +} + +void changeIfArguments( + QueryTreeNodePtr & first, QueryTreeNodePtr & second, const std::set & string_values, const ContextPtr & context) +{ + auto [result_type, enum_string] = getEnumTypeAndString(string_values); + auto enum_literal = std::make_shared(enum_string, std::make_shared()); + auto enum_literal_node = std::make_shared(std::move(enum_literal)); + + const auto create_cast_function = [&, &result_type = result_type](QueryTreeNodePtr & node) + { + auto cast_function = FunctionFactory::instance().get("_CAST", context); + QueryTreeNodes arguments{node, enum_literal_node}; + + auto function_node = std::make_shared("_CAST"); + function_node->resolveAsFunction(std::move(cast_function), result_type); + function_node->getArguments().getNodes() = std::move(arguments); + + node = std::move(function_node); + }; + + create_cast_function(first); + create_cast_function(second); +} + +void changeTransformArguments( + QueryTreeNodePtr & array_to, + QueryTreeNodePtr & default_value, + const std::set & string_values, + const ContextPtr & context) +{ + auto [result_type, enum_string] = getEnumTypeAndString(string_values); + + { + auto enum_literal = std::make_shared(fmt::format("Array({})", enum_string), std::make_shared()); + auto enum_literal_node = std::make_shared(std::move(enum_literal)); + + auto cast_function = FunctionFactory::instance().get("_CAST", context); + QueryTreeNodes arguments{array_to, enum_literal_node}; + + auto function_node = std::make_shared("_CAST"); + function_node->resolveAsFunction(std::move(cast_function), std::make_shared(result_type)); + function_node->getArguments().getNodes() = std::move(arguments); + + array_to = std::move(function_node); + } + + { + auto enum_literal = std::make_shared(enum_string, std::make_shared()); + auto enum_literal_node = std::make_shared(std::move(enum_literal)); + + auto cast_function = FunctionFactory::instance().get("_CAST", context); + QueryTreeNodes arguments{default_value, enum_literal_node}; + + auto function_node = std::make_shared("_CAST"); + function_node->resolveAsFunction(std::move(cast_function), result_type); + function_node->getArguments().getNodes() = std::move(arguments); + + default_value = std::move(function_node); + } +} + +class ConvertStringsToEnumVisitor : public InDepthQueryTreeVisitor +{ +public: + explicit ConvertStringsToEnumVisitor(std::unordered_set used_functions_, ContextPtr context_) + : used_functions(std::move(used_functions_)), context(std::move(context_)) + { + } + + bool needChildVisit(VisitQueryTreeNodeType & parent, VisitQueryTreeNodeType & /* child */) + { + return parent->getNodeType() != QueryTreeNodeType::FUNCTION; + } + + void visitImpl(QueryTreeNodePtr & node) + { + auto * function_node = node->as(); + + if (!function_node) + return; + + /// we cannot change the type of its result because it's used + /// as argument in another function + if (used_functions.contains(function_node->getAlias())) + return; + + std::string_view function_name = function_node->getFunctionName(); + if (function_name == "if") + { + auto & argument_nodes = function_node->getArguments().getNodes(); + + if (argument_nodes.size() != 3) + return; + + const auto * first_literal = argument_nodes[1]->as(); + const auto * second_literal = argument_nodes[2]->as(); + + if (!first_literal || !second_literal) + return; + + if (!WhichDataType(first_literal->getResultType()).isString() || !WhichDataType(second_literal->getResultType()).isString()) + return; + + std::set string_values; + string_values.insert(first_literal->getValue().get()); + string_values.insert(second_literal->getValue().get()); + + changeIfArguments(argument_nodes[1], argument_nodes[2], string_values, context); + return; + } + + if (function_name == "transform") + { + auto & argument_nodes = function_node->getArguments().getNodes(); + + if (argument_nodes.size() != 4) + return; + + const auto * literal_to = argument_nodes[2]->as(); + const auto * literal_default = argument_nodes[3]->as(); + + if (!literal_to || !literal_default) + return; + + if (!WhichDataType(literal_to->getResultType()).isArray() || !WhichDataType(literal_default->getResultType()).isString()) + return; + + auto array_to = literal_to->getValue().get(); + + if (array_to.empty()) + return; + + if (!std::all_of( + array_to.begin(), + array_to.end(), + [](const auto & field) { return field.getType() == Field::Types::Which::String; })) + return; + + /// collect possible string values + std::set string_values; + + for (const auto & value : array_to) + string_values.insert(value.get()); + + string_values.insert(literal_default->getValue().get()); + + changeTransformArguments(argument_nodes[2], argument_nodes[3], string_values, context); + return; + } + } + +private: + std::unordered_set used_functions; + ContextPtr context; +}; + +} + +void IfTransformStringsToEnumPass::run(QueryTreeNodePtr query, ContextPtr context) +{ + std::unordered_set used_functions; + std::unordered_set function_names{"if", "transform"}; + + { + FindUsedFunctionsVisitor visitor(used_functions, function_names, 0); + visitor.visit(query); + } + + { + ConvertStringsToEnumVisitor visitor(used_functions, context); + visitor.visit(query); + } } } diff --git a/src/Analyzer/QueryTreePassManager.cpp b/src/Analyzer/QueryTreePassManager.cpp index 7c5bc1a48d8..7c5550c2545 100644 --- a/src/Analyzer/QueryTreePassManager.cpp +++ b/src/Analyzer/QueryTreePassManager.cpp @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -22,6 +23,7 @@ #include #include #include +#include "Analyzer/Passes/IfTransformStringsToEnumPass.h" namespace DB { @@ -77,7 +79,6 @@ public: * TODO: Support setting optimize_duplicate_order_by_and_distinct. * TODO: Support setting optimize_redundant_functions_in_order_by. * TODO: Support setting optimize_monotonous_functions_in_order_by. - * TODO: Support setting optimize_if_transform_strings_to_enum. * TODO: Support settings.optimize_or_like_chain. * TODO: Add optimizations based on function semantics. Example: SELECT * FROM test_table WHERE id != id. (id is not nullable column). */ @@ -193,6 +194,9 @@ void addQueryTreePasses(QueryTreePassManager & manager) if (settings.optimize_syntax_fuse_functions) manager.addPass(std::make_unique()); + + if (settings.optimize_if_transform_strings_to_enum) + manager.addPass(std::make_unique()); } } diff --git a/src/Parsers/parseQuery.cpp b/src/Parsers/parseQuery.cpp index 4a0c60da48d..56ce3b78936 100644 --- a/src/Parsers/parseQuery.cpp +++ b/src/Parsers/parseQuery.cpp @@ -138,6 +138,7 @@ void writeCommonErrorMessage( if (!query_description.empty()) out << " (" << query_description << ")"; + out << std::string(begin, end - begin) << '\n'; out << ": failed at position " << (last_token.begin - begin + 1); if (last_token.type == TokenType::EndOfStream || last_token.type == TokenType::Semicolon) From b89dff3b4587412d58d551520b2886ce49576bd6 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Wed, 7 Dec 2022 11:44:23 +0000 Subject: [PATCH 03/35] Add tests --- src/Parsers/parseQuery.cpp | 1 - ...324_if_transform_strings_to_enum.reference | 2 +- ...497_if_transform_strings_to_enum.reference | 174 ++++++++++++++++++ .../02497_if_transform_strings_to_enum.sql | 24 +++ 4 files changed, 199 insertions(+), 2 deletions(-) create mode 100644 tests/queries/0_stateless/02497_if_transform_strings_to_enum.reference create mode 100644 tests/queries/0_stateless/02497_if_transform_strings_to_enum.sql diff --git a/src/Parsers/parseQuery.cpp b/src/Parsers/parseQuery.cpp index 56ce3b78936..4a0c60da48d 100644 --- a/src/Parsers/parseQuery.cpp +++ b/src/Parsers/parseQuery.cpp @@ -138,7 +138,6 @@ void writeCommonErrorMessage( if (!query_description.empty()) out << " (" << query_description << ")"; - out << std::string(begin, end - begin) << '\n'; out << ": failed at position " << (last_token.begin - begin + 1); if (last_token.type == TokenType::EndOfStream || last_token.type == TokenType::Semicolon) diff --git a/tests/queries/0_stateless/01324_if_transform_strings_to_enum.reference b/tests/queries/0_stateless/01324_if_transform_strings_to_enum.reference index 7cf545176e3..4bb0a9b8323 100644 --- a/tests/queries/0_stateless/01324_if_transform_strings_to_enum.reference +++ b/tests/queries/0_stateless/01324_if_transform_strings_to_enum.reference @@ -21,7 +21,7 @@ censor.net censor.net censor.net censor.net -SELECT if(number > 5, \'censor.net\', \'google\') +SELECT if(number > 5, _CAST(\'censor.net\', \'Enum8(\\\'censor.net\\\' = 1, \\\'google\\\' = 2)\'), _CAST(\'google\', \'Enum8(\\\'censor.net\\\' = 1, \\\'google\\\' = 2)\')) FROM system.numbers LIMIT 10 other diff --git a/tests/queries/0_stateless/02497_if_transform_strings_to_enum.reference b/tests/queries/0_stateless/02497_if_transform_strings_to_enum.reference new file mode 100644 index 00000000000..37a220c7cea --- /dev/null +++ b/tests/queries/0_stateless/02497_if_transform_strings_to_enum.reference @@ -0,0 +1,174 @@ +other +other +google +other +censor.net +other +yahoo +other +other +other +SELECT transform(number, [2, 4, 6], _CAST([\'google\', \'censor.net\', \'yahoo\'], \'Array(Enum8(\\\'censor.net\\\' = 1, \\\'google\\\' = 2, \\\'other\\\' = 3, \\\'yahoo\\\' = 4))\'), _CAST(\'other\', \'Enum8(\\\'censor.net\\\' = 1, \\\'google\\\' = 2, \\\'other\\\' = 3, \\\'yahoo\\\' = 4)\')) +FROM system.numbers +LIMIT 10 +QUERY id: 0 + PROJECTION COLUMNS + transform(number, [2, 4, 6], [\'google\', \'censor.net\', \'yahoo\'], \'other\') String + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: transform, function_type: ordinary, result_type: String + ARGUMENTS + LIST id: 3, nodes: 4 + COLUMN id: 4, column_name: number, result_type: UInt64, source_id: 5 + CONSTANT id: 6, constant_value: Array_[UInt64_2, UInt64_4, UInt64_6], constant_value_type: Array(UInt8) + FUNCTION id: 7, function_name: _CAST, function_type: ordinary, result_type: Array(Enum16(\'censor.net\' = 1, \'google\' = 2, \'other\' = 3, \'yahoo\' = 4)) + ARGUMENTS + LIST id: 8, nodes: 2 + CONSTANT id: 9, constant_value: Array_[\'google\', \'censor.net\', \'yahoo\'], constant_value_type: Array(String) + CONSTANT id: 10, constant_value: \'Array(Enum16(\\\'censor.net\\\' = 1, \\\'google\\\' = 2, \\\'other\\\' = 3, \\\'yahoo\\\' = 4))\', constant_value_type: String + FUNCTION id: 11, function_name: _CAST, function_type: ordinary, result_type: Enum16(\'censor.net\' = 1, \'google\' = 2, \'other\' = 3, \'yahoo\' = 4) + ARGUMENTS + LIST id: 12, nodes: 2 + CONSTANT id: 13, constant_value: \'other\', constant_value_type: String + CONSTANT id: 14, constant_value: \'Enum16(\\\'censor.net\\\' = 1, \\\'google\\\' = 2, \\\'other\\\' = 3, \\\'yahoo\\\' = 4)\', constant_value_type: String + JOIN TREE + TABLE id: 5, table_name: system.numbers + LIMIT + CONSTANT id: 15, constant_value: UInt64_10, constant_value_type: UInt8 +google +google +google +google +google +google +censor.net +censor.net +censor.net +censor.net +SELECT if(number > 5, _CAST(\'censor.net\', \'Enum8(\\\'censor.net\\\' = 1, \\\'google\\\' = 2)\'), _CAST(\'google\', \'Enum8(\\\'censor.net\\\' = 1, \\\'google\\\' = 2)\')) +FROM system.numbers +LIMIT 10 +QUERY id: 0 + PROJECTION COLUMNS + if(greater(number, 5), \'censor.net\', \'google\') String + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: if, function_type: ordinary, result_type: String + ARGUMENTS + LIST id: 3, nodes: 3 + FUNCTION id: 4, function_name: greater, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 5, nodes: 2 + COLUMN id: 6, column_name: number, result_type: UInt64, source_id: 7 + CONSTANT id: 8, constant_value: UInt64_5, constant_value_type: UInt8 + FUNCTION id: 9, function_name: _CAST, function_type: ordinary, result_type: Enum16(\'censor.net\' = 1, \'google\' = 2) + ARGUMENTS + LIST id: 10, nodes: 2 + CONSTANT id: 11, constant_value: \'censor.net\', constant_value_type: String + CONSTANT id: 12, constant_value: \'Enum16(\\\'censor.net\\\' = 1, \\\'google\\\' = 2)\', constant_value_type: String + FUNCTION id: 13, function_name: _CAST, function_type: ordinary, result_type: Enum16(\'censor.net\' = 1, \'google\' = 2) + ARGUMENTS + LIST id: 14, nodes: 2 + CONSTANT id: 15, constant_value: \'google\', constant_value_type: String + CONSTANT id: 12, constant_value: \'Enum16(\\\'censor.net\\\' = 1, \\\'google\\\' = 2)\', constant_value_type: String + JOIN TREE + TABLE id: 7, table_name: system.numbers + LIMIT + CONSTANT id: 16, constant_value: UInt64_10, constant_value_type: UInt8 +21 +21 +21 +21 +21 +21 +20 +20 +20 +20 +SELECT CAST(if(number > 5, \'20\', \'21\'), \'Int8\') +FROM system.numbers +LIMIT 10 +QUERY id: 0 + PROJECTION COLUMNS + CAST(if(greater(number, 5), \'20\', \'21\'), \'Int8\') Int8 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: CAST, function_type: ordinary, result_type: Int8 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: if, function_type: ordinary, result_type: String + ARGUMENTS + LIST id: 5, nodes: 3 + FUNCTION id: 6, function_name: greater, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 7, nodes: 2 + COLUMN id: 8, column_name: number, result_type: UInt64, source_id: 9 + CONSTANT id: 10, constant_value: UInt64_5, constant_value_type: UInt8 + CONSTANT id: 11, constant_value: \'20\', constant_value_type: String + CONSTANT id: 12, constant_value: \'21\', constant_value_type: String + CONSTANT id: 13, constant_value: \'Int8\', constant_value_type: String + JOIN TREE + TABLE id: 9, table_name: system.numbers + LIMIT + CONSTANT id: 14, constant_value: UInt64_10, constant_value_type: UInt8 +other +other +google +other +censor.net +other +yahoo +other +other +other +SELECT transform(number, [2, 4, 6], [\'google\', \'censor.net\', \'yahoo\'], \'other\') +FROM system.numbers +LIMIT 10 +QUERY id: 0 + PROJECTION COLUMNS + transform(number, [2, 4, 6], [\'google\', \'censor.net\', \'yahoo\'], \'other\') String + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: transform, function_type: ordinary, result_type: String + ARGUMENTS + LIST id: 3, nodes: 4 + COLUMN id: 4, column_name: number, result_type: UInt64, source_id: 5 + CONSTANT id: 6, constant_value: Array_[UInt64_2, UInt64_4, UInt64_6], constant_value_type: Array(UInt8) + CONSTANT id: 7, constant_value: Array_[\'google\', \'censor.net\', \'yahoo\'], constant_value_type: Array(String) + CONSTANT id: 8, constant_value: \'other\', constant_value_type: String + JOIN TREE + TABLE id: 5, table_name: system.numbers + LIMIT + CONSTANT id: 9, constant_value: UInt64_10, constant_value_type: UInt8 +google +google +google +google +google +google +censor.net +censor.net +censor.net +censor.net +SELECT if(number > 5, \'censor.net\', \'google\') +FROM system.numbers +LIMIT 10 +QUERY id: 0 + PROJECTION COLUMNS + if(greater(number, 5), \'censor.net\', \'google\') String + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: if, function_type: ordinary, result_type: String + ARGUMENTS + LIST id: 3, nodes: 3 + FUNCTION id: 4, function_name: greater, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 5, nodes: 2 + COLUMN id: 6, column_name: number, result_type: UInt64, source_id: 7 + CONSTANT id: 8, constant_value: UInt64_5, constant_value_type: UInt8 + CONSTANT id: 9, constant_value: \'censor.net\', constant_value_type: String + CONSTANT id: 10, constant_value: \'google\', constant_value_type: String + JOIN TREE + TABLE id: 7, table_name: system.numbers + LIMIT + CONSTANT id: 11, constant_value: UInt64_10, constant_value_type: UInt8 diff --git a/tests/queries/0_stateless/02497_if_transform_strings_to_enum.sql b/tests/queries/0_stateless/02497_if_transform_strings_to_enum.sql new file mode 100644 index 00000000000..1a10a578146 --- /dev/null +++ b/tests/queries/0_stateless/02497_if_transform_strings_to_enum.sql @@ -0,0 +1,24 @@ +SET allow_experimental_analyzer = 1; +SET optimize_if_transform_strings_to_enum = 1; + +SELECT transform(number, [2, 4, 6], ['google', 'censor.net', 'yahoo'], 'other') FROM system.numbers LIMIT 10; +EXPLAIN SYNTAX SELECT transform(number, [2, 4, 6], ['google', 'censor.net', 'yahoo'], 'other') FROM system.numbers LIMIT 10; +EXPLAIN QUERY TREE run_passes = 1 SELECT transform(number, [2, 4, 6], ['google', 'censor.net', 'yahoo'], 'other') FROM system.numbers LIMIT 10; + +SELECT number > 5 ? 'censor.net' : 'google' FROM system.numbers LIMIT 10; +EXPLAIN SYNTAX SELECT number > 5 ? 'censor.net' : 'google' FROM system.numbers LIMIT 10; +EXPLAIN QUERY TREE run_passes = 1 SELECT number > 5 ? 'censor.net' : 'google' FROM system.numbers LIMIT 10; + +SELECT CAST(number > 5 ? '20' : '21', 'Int8') FROM system.numbers LIMIT 10; +EXPLAIN SYNTAX SELECT CAST(number > 5 ? '20' : '21', 'Int8') FROM system.numbers LIMIT 10; +EXPLAIN QUERY TREE run_passes = 1 SELECT CAST(number > 5 ? '20' : '21', 'Int8') FROM system.numbers LIMIT 10; + +SET optimize_if_transform_strings_to_enum = 0; + +SELECT transform(number, [2, 4, 6], ['google', 'censor.net', 'yahoo'], 'other') FROM system.numbers LIMIT 10; +EXPLAIN SYNTAX SELECT transform(number, [2, 4, 6], ['google', 'censor.net', 'yahoo'], 'other') FROM system.numbers LIMIT 10; +EXPLAIN QUERY TREE run_passes = 1 SELECT transform(number, [2, 4, 6], ['google', 'censor.net', 'yahoo'], 'other') FROM system.numbers LIMIT 10; + +SELECT number > 5 ? 'censor.net' : 'google' FROM system.numbers LIMIT 10; +EXPLAIN SYNTAX SELECT number > 5 ? 'censor.net' : 'google' FROM system.numbers LIMIT 10; +EXPLAIN QUERY TREE run_passes = 1 SELECT number > 5 ? 'censor.net' : 'google' FROM system.numbers LIMIT 10; From 41e8426f27d3eb199e3d1b98cfbb401fd5992367 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Wed, 7 Dec 2022 11:59:55 +0000 Subject: [PATCH 04/35] Small fixes --- .../Passes/IfTransformStringsToEnumPass.cpp | 2 +- src/Analyzer/QueryTreePassManager.cpp | 1 - .../02497_if_transform_strings_to_enum.reference | 16 ++++++++-------- 3 files changed, 9 insertions(+), 10 deletions(-) diff --git a/src/Analyzer/Passes/IfTransformStringsToEnumPass.cpp b/src/Analyzer/Passes/IfTransformStringsToEnumPass.cpp index 51defd8de1f..86db814ea33 100644 --- a/src/Analyzer/Passes/IfTransformStringsToEnumPass.cpp +++ b/src/Analyzer/Passes/IfTransformStringsToEnumPass.cpp @@ -122,7 +122,7 @@ std::pair getEnumTypeAndString(const std::set(string_values); + auto enum_type = getDataEnumType(string_values); enum_string = makeStringsEnum(*enum_type); result_type = std::move(enum_type); } diff --git a/src/Analyzer/QueryTreePassManager.cpp b/src/Analyzer/QueryTreePassManager.cpp index 7c5550c2545..ca9d4e3d1e3 100644 --- a/src/Analyzer/QueryTreePassManager.cpp +++ b/src/Analyzer/QueryTreePassManager.cpp @@ -23,7 +23,6 @@ #include #include #include -#include "Analyzer/Passes/IfTransformStringsToEnumPass.h" namespace DB { diff --git a/tests/queries/0_stateless/02497_if_transform_strings_to_enum.reference b/tests/queries/0_stateless/02497_if_transform_strings_to_enum.reference index 37a220c7cea..ed514d478f5 100644 --- a/tests/queries/0_stateless/02497_if_transform_strings_to_enum.reference +++ b/tests/queries/0_stateless/02497_if_transform_strings_to_enum.reference @@ -21,16 +21,16 @@ QUERY id: 0 LIST id: 3, nodes: 4 COLUMN id: 4, column_name: number, result_type: UInt64, source_id: 5 CONSTANT id: 6, constant_value: Array_[UInt64_2, UInt64_4, UInt64_6], constant_value_type: Array(UInt8) - FUNCTION id: 7, function_name: _CAST, function_type: ordinary, result_type: Array(Enum16(\'censor.net\' = 1, \'google\' = 2, \'other\' = 3, \'yahoo\' = 4)) + FUNCTION id: 7, function_name: _CAST, function_type: ordinary, result_type: Array(Enum8(\'censor.net\' = 1, \'google\' = 2, \'other\' = 3, \'yahoo\' = 4)) ARGUMENTS LIST id: 8, nodes: 2 CONSTANT id: 9, constant_value: Array_[\'google\', \'censor.net\', \'yahoo\'], constant_value_type: Array(String) - CONSTANT id: 10, constant_value: \'Array(Enum16(\\\'censor.net\\\' = 1, \\\'google\\\' = 2, \\\'other\\\' = 3, \\\'yahoo\\\' = 4))\', constant_value_type: String - FUNCTION id: 11, function_name: _CAST, function_type: ordinary, result_type: Enum16(\'censor.net\' = 1, \'google\' = 2, \'other\' = 3, \'yahoo\' = 4) + CONSTANT id: 10, constant_value: \'Array(Enum8(\\\'censor.net\\\' = 1, \\\'google\\\' = 2, \\\'other\\\' = 3, \\\'yahoo\\\' = 4))\', constant_value_type: String + FUNCTION id: 11, function_name: _CAST, function_type: ordinary, result_type: Enum8(\'censor.net\' = 1, \'google\' = 2, \'other\' = 3, \'yahoo\' = 4) ARGUMENTS LIST id: 12, nodes: 2 CONSTANT id: 13, constant_value: \'other\', constant_value_type: String - CONSTANT id: 14, constant_value: \'Enum16(\\\'censor.net\\\' = 1, \\\'google\\\' = 2, \\\'other\\\' = 3, \\\'yahoo\\\' = 4)\', constant_value_type: String + CONSTANT id: 14, constant_value: \'Enum8(\\\'censor.net\\\' = 1, \\\'google\\\' = 2, \\\'other\\\' = 3, \\\'yahoo\\\' = 4)\', constant_value_type: String JOIN TREE TABLE id: 5, table_name: system.numbers LIMIT @@ -61,16 +61,16 @@ QUERY id: 0 LIST id: 5, nodes: 2 COLUMN id: 6, column_name: number, result_type: UInt64, source_id: 7 CONSTANT id: 8, constant_value: UInt64_5, constant_value_type: UInt8 - FUNCTION id: 9, function_name: _CAST, function_type: ordinary, result_type: Enum16(\'censor.net\' = 1, \'google\' = 2) + FUNCTION id: 9, function_name: _CAST, function_type: ordinary, result_type: Enum8(\'censor.net\' = 1, \'google\' = 2) ARGUMENTS LIST id: 10, nodes: 2 CONSTANT id: 11, constant_value: \'censor.net\', constant_value_type: String - CONSTANT id: 12, constant_value: \'Enum16(\\\'censor.net\\\' = 1, \\\'google\\\' = 2)\', constant_value_type: String - FUNCTION id: 13, function_name: _CAST, function_type: ordinary, result_type: Enum16(\'censor.net\' = 1, \'google\' = 2) + CONSTANT id: 12, constant_value: \'Enum8(\\\'censor.net\\\' = 1, \\\'google\\\' = 2)\', constant_value_type: String + FUNCTION id: 13, function_name: _CAST, function_type: ordinary, result_type: Enum8(\'censor.net\' = 1, \'google\' = 2) ARGUMENTS LIST id: 14, nodes: 2 CONSTANT id: 15, constant_value: \'google\', constant_value_type: String - CONSTANT id: 12, constant_value: \'Enum16(\\\'censor.net\\\' = 1, \\\'google\\\' = 2)\', constant_value_type: String + CONSTANT id: 12, constant_value: \'Enum8(\\\'censor.net\\\' = 1, \\\'google\\\' = 2)\', constant_value_type: String JOIN TREE TABLE id: 7, table_name: system.numbers LIMIT From 6d41abf2c70d7fdb4a6029cbf09d295d6a5633f9 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Wed, 7 Dec 2022 13:22:01 +0000 Subject: [PATCH 05/35] Fix clang-tidy error --- src/Analyzer/Passes/IfTransformStringsToEnumPass.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Analyzer/Passes/IfTransformStringsToEnumPass.cpp b/src/Analyzer/Passes/IfTransformStringsToEnumPass.cpp index 86db814ea33..d006288c337 100644 --- a/src/Analyzer/Passes/IfTransformStringsToEnumPass.cpp +++ b/src/Analyzer/Passes/IfTransformStringsToEnumPass.cpp @@ -32,7 +32,7 @@ public: { } - bool needChildVisit(VisitQueryTreeNodeType & parent, VisitQueryTreeNodeType & /* child */) + static bool needChildVisit(VisitQueryTreeNodeType & parent, VisitQueryTreeNodeType & /* child */) { return parent->getNodeType() != QueryTreeNodeType::FUNCTION; } @@ -198,7 +198,7 @@ public: { } - bool needChildVisit(VisitQueryTreeNodeType & parent, VisitQueryTreeNodeType & /* child */) + static bool needChildVisit(VisitQueryTreeNodeType & parent, VisitQueryTreeNodeType & /* child */) { return parent->getNodeType() != QueryTreeNodeType::FUNCTION; } From 905e59f6404fb01e713cb7925ee102cb5f5bfae8 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Thu, 8 Dec 2022 08:45:11 +0000 Subject: [PATCH 06/35] Add example --- .../Passes/IfTransformStringsToEnumPass.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/src/Analyzer/Passes/IfTransformStringsToEnumPass.h b/src/Analyzer/Passes/IfTransformStringsToEnumPass.h index 03456bbbb2e..a4a014967e0 100644 --- a/src/Analyzer/Passes/IfTransformStringsToEnumPass.h +++ b/src/Analyzer/Passes/IfTransformStringsToEnumPass.h @@ -7,6 +7,24 @@ namespace DB /** * This pass replaces string-type arguments in If and Transform to enum. + * + * E.g. + * ------------------------------- + * SELECT if(number > 5, 'a', 'b') + * FROM system.numbers; + * + * will be transformed into + * + * SELECT if(number > 5, _CAST('a', 'Enum8(\'a\' = 1, \'b\' = 2)'), _CAST('b', 'Enum8(\'a\' = 1, \'b\' = 2)')) + * FROM system.numbers; + * ------------------------------- + * SELECT transform(number, [2, 4], ['a', 'b'], 'c') FROM system.numbers; + * + * will be transformed into + * + * SELECT transform(number, [2, 4], _CAST(['a', 'b'], 'Array(Enum8(\'a\' = 1, \'b\' = 2, \'c\' = 3)'), _CAST('c', 'Enum8(\'a\' = 1, \'b\' = 2, \'c\' = 3)')) + * FROM system.numbers; + * ------------------------------- */ class IfTransformStringsToEnumPass final : public IQueryTreePass { From a6359f6f6da511c1945318f787e33eeabcdd558b Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Thu, 8 Dec 2022 09:50:10 +0000 Subject: [PATCH 07/35] Review fixes --- .../Passes/IfTransformStringsToEnumPass.cpp | 140 +++++------------- 1 file changed, 33 insertions(+), 107 deletions(-) diff --git a/src/Analyzer/Passes/IfTransformStringsToEnumPass.cpp b/src/Analyzer/Passes/IfTransformStringsToEnumPass.cpp index d006288c337..470fb155b5b 100644 --- a/src/Analyzer/Passes/IfTransformStringsToEnumPass.cpp +++ b/src/Analyzer/Passes/IfTransformStringsToEnumPass.cpp @@ -12,8 +12,6 @@ #include -#include - namespace DB { @@ -25,7 +23,7 @@ class FindUsedFunctionsVisitor : public ConstInDepthQueryTreeVisitor & used_functions_, + std::unordered_set & used_functions_, const std::unordered_set & function_names_, size_t stack_size_) : used_functions(used_functions_), function_names(function_names_), stack_size(stack_size_) @@ -46,56 +44,20 @@ public: const auto & function_name = function_node->getFunctionName(); if (function_names.contains(function_name) && stack_size > 0) - { - const auto & alias = function_node->getAlias(); - - if (!alias.empty()) - used_functions.insert(alias); - } + used_functions.insert(function_node); FindUsedFunctionsVisitor visitor(used_functions, function_names, stack_size + 1); visitor.visit(function_node->getArgumentsNode()); } private: - /// we store only function aliases which are not modified, including the nodes owning it - /// we will modify only argument nodes - std::unordered_set & used_functions; + /// we store only function pointers because these nodes won't be modified + std::unordered_set & used_functions; const std::unordered_set & function_names; size_t stack_size; }; -template -std::string makeStringsEnum(const EnumType & enum_type) -{ - std::string enum_string; - - if constexpr (std::same_as) - { - enum_string = "Enum16("; - } - else - { - static_assert(std::same_as, "Invalid field type for enum"); - enum_string = "Enum8("; - } - - const auto & values = enum_type.getValues(); - for (const auto & [value_string, value_index] : values) - { - enum_string += "\'" + value_string + "\' = " + std::to_string(value_index); - - assert(value_index > 0); - if (static_cast(value_index) < values.size()) - enum_string += ", "; - } - - enum_string += ")"; - - return enum_string; -} - -/// We place strings in ascending order here under the assumption it colud speed up String to Enum conversion. +/// We place strings in ascending order here under the assumption it could speed up String to Enum conversion. template auto getDataEnumType(const std::set & string_values) { @@ -110,47 +72,36 @@ auto getDataEnumType(const std::set & string_values) return std::make_shared(std::move(enum_values)); } -std::pair getEnumTypeAndString(const std::set & string_values) +DataTypePtr getEnumType(const std::set & string_values) { - DataTypePtr result_type = nullptr; - std::string enum_string; if (string_values.size() >= 255) - { - auto enum_type = getDataEnumType(string_values); - enum_string = makeStringsEnum(*enum_type); - result_type = std::move(enum_type); - } + return getDataEnumType(string_values); else - { - auto enum_type = getDataEnumType(string_values); - enum_string = makeStringsEnum(*enum_type); - result_type = std::move(enum_type); - } + return getDataEnumType(string_values); +} - return {std::move(result_type), std::move(enum_string)}; +QueryTreeNodePtr createCastFunction(QueryTreeNodePtr from, DataTypePtr result_type, ContextPtr context) +{ + auto enum_literal = std::make_shared(result_type->getName(), std::make_shared()); + auto enum_literal_node = std::make_shared(std::move(enum_literal)); + + auto cast_function = FunctionFactory::instance().get("_CAST", std::move(context)); + QueryTreeNodes arguments{std::move(from), std::move(enum_literal_node)}; + + auto function_node = std::make_shared("_CAST"); + function_node->resolveAsFunction(std::move(cast_function), std::move(result_type)); + function_node->getArguments().getNodes() = std::move(arguments); + + return function_node; } void changeIfArguments( QueryTreeNodePtr & first, QueryTreeNodePtr & second, const std::set & string_values, const ContextPtr & context) { - auto [result_type, enum_string] = getEnumTypeAndString(string_values); - auto enum_literal = std::make_shared(enum_string, std::make_shared()); - auto enum_literal_node = std::make_shared(std::move(enum_literal)); + auto result_type = getEnumType(string_values); - const auto create_cast_function = [&, &result_type = result_type](QueryTreeNodePtr & node) - { - auto cast_function = FunctionFactory::instance().get("_CAST", context); - QueryTreeNodes arguments{node, enum_literal_node}; - - auto function_node = std::make_shared("_CAST"); - function_node->resolveAsFunction(std::move(cast_function), result_type); - function_node->getArguments().getNodes() = std::move(arguments); - - node = std::move(function_node); - }; - - create_cast_function(first); - create_cast_function(second); + first = createCastFunction(first, result_type, context); + second = createCastFunction(second, result_type, context); } void changeTransformArguments( @@ -159,41 +110,16 @@ void changeTransformArguments( const std::set & string_values, const ContextPtr & context) { - auto [result_type, enum_string] = getEnumTypeAndString(string_values); + auto result_type = getEnumType(string_values); - { - auto enum_literal = std::make_shared(fmt::format("Array({})", enum_string), std::make_shared()); - auto enum_literal_node = std::make_shared(std::move(enum_literal)); - - auto cast_function = FunctionFactory::instance().get("_CAST", context); - QueryTreeNodes arguments{array_to, enum_literal_node}; - - auto function_node = std::make_shared("_CAST"); - function_node->resolveAsFunction(std::move(cast_function), std::make_shared(result_type)); - function_node->getArguments().getNodes() = std::move(arguments); - - array_to = std::move(function_node); - } - - { - auto enum_literal = std::make_shared(enum_string, std::make_shared()); - auto enum_literal_node = std::make_shared(std::move(enum_literal)); - - auto cast_function = FunctionFactory::instance().get("_CAST", context); - QueryTreeNodes arguments{default_value, enum_literal_node}; - - auto function_node = std::make_shared("_CAST"); - function_node->resolveAsFunction(std::move(cast_function), result_type); - function_node->getArguments().getNodes() = std::move(arguments); - - default_value = std::move(function_node); - } + array_to = createCastFunction(array_to, std::make_shared(result_type), context); + default_value = createCastFunction(default_value, std::move(result_type), context); } class ConvertStringsToEnumVisitor : public InDepthQueryTreeVisitor { public: - explicit ConvertStringsToEnumVisitor(std::unordered_set used_functions_, ContextPtr context_) + explicit ConvertStringsToEnumVisitor(std::unordered_set used_functions_, ContextPtr context_) : used_functions(std::move(used_functions_)), context(std::move(context_)) { } @@ -212,7 +138,7 @@ public: /// we cannot change the type of its result because it's used /// as argument in another function - if (used_functions.contains(function_node->getAlias())) + if (used_functions.contains(function_node)) return; std::string_view function_name = function_node->getFunctionName(); @@ -281,7 +207,7 @@ public: } private: - std::unordered_set used_functions; + std::unordered_set used_functions; ContextPtr context; }; @@ -289,7 +215,7 @@ private: void IfTransformStringsToEnumPass::run(QueryTreeNodePtr query, ContextPtr context) { - std::unordered_set used_functions; + std::unordered_set used_functions; std::unordered_set function_names{"if", "transform"}; { @@ -298,7 +224,7 @@ void IfTransformStringsToEnumPass::run(QueryTreeNodePtr query, ContextPtr contex } { - ConvertStringsToEnumVisitor visitor(used_functions, context); + ConvertStringsToEnumVisitor visitor(std::move(used_functions), context); visitor.visit(query); } } From 6121d70fe12d4b080836ae8ff06a96c62941fb27 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Thu, 8 Dec 2022 09:50:19 +0000 Subject: [PATCH 08/35] Update tests --- .../Passes/IfTransformStringsToEnumPass.cpp | 100 ++--- ...497_if_transform_strings_to_enum.reference | 416 +++++++++++++++--- .../02497_if_transform_strings_to_enum.sql | 26 +- 3 files changed, 409 insertions(+), 133 deletions(-) diff --git a/src/Analyzer/Passes/IfTransformStringsToEnumPass.cpp b/src/Analyzer/Passes/IfTransformStringsToEnumPass.cpp index 470fb155b5b..18518bb5f8b 100644 --- a/src/Analyzer/Passes/IfTransformStringsToEnumPass.cpp +++ b/src/Analyzer/Passes/IfTransformStringsToEnumPass.cpp @@ -18,45 +18,6 @@ namespace DB namespace { -/// Visitor for finding functions that are used inside another function -class FindUsedFunctionsVisitor : public ConstInDepthQueryTreeVisitor -{ -public: - FindUsedFunctionsVisitor( - std::unordered_set & used_functions_, - const std::unordered_set & function_names_, - size_t stack_size_) - : used_functions(used_functions_), function_names(function_names_), stack_size(stack_size_) - { - } - - static bool needChildVisit(VisitQueryTreeNodeType & parent, VisitQueryTreeNodeType & /* child */) - { - return parent->getNodeType() != QueryTreeNodeType::FUNCTION; - } - - void visitImpl(const QueryTreeNodePtr & node) - { - auto * function_node = node->as(); - - if (!function_node) - return; - - const auto & function_name = function_node->getFunctionName(); - if (function_names.contains(function_name) && stack_size > 0) - used_functions.insert(function_node); - - FindUsedFunctionsVisitor visitor(used_functions, function_names, stack_size + 1); - visitor.visit(function_node->getArgumentsNode()); - } - -private: - /// we store only function pointers because these nodes won't be modified - std::unordered_set & used_functions; - const std::unordered_set & function_names; - size_t stack_size; -}; - /// We place strings in ascending order here under the assumption it could speed up String to Enum conversion. template auto getDataEnumType(const std::set & string_values) @@ -95,6 +56,8 @@ QueryTreeNodePtr createCastFunction(QueryTreeNodePtr from, DataTypePtr result_ty return function_node; } +/// if(arg1, arg2, arg3) will be transformed to if(arg1, _CAST(arg2, Enum...), _CAST(arg3, Enum...)) +/// where Enum is generated based on the possible values stored in string_values void changeIfArguments( QueryTreeNodePtr & first, QueryTreeNodePtr & second, const std::set & string_values, const ContextPtr & context) { @@ -104,6 +67,8 @@ void changeIfArguments( second = createCastFunction(second, result_type, context); } +/// transform(value, array_from, array_to, default_value) will be transformed to transform(value, array_from, _CAST(array_to, Array(Enum...)), _CAST(default_value, Enum...)) +/// where Enum is generated based on the possible values stored in string_values void changeTransformArguments( QueryTreeNodePtr & array_to, QueryTreeNodePtr & default_value, @@ -116,19 +81,25 @@ void changeTransformArguments( default_value = createCastFunction(default_value, std::move(result_type), context); } +void wrapIntoToString(FunctionNode & function_node, QueryTreeNodePtr arg, ContextPtr context) +{ + assert(WhichDataType(function_node.getResultType()).isString()); + + auto to_string_function = FunctionFactory::instance().get("toString", std::move(context)); + QueryTreeNodes arguments{std::move(arg)}; + + function_node.resolveAsFunction(std::move(to_string_function), std::make_shared()); + function_node.getArguments().getNodes() = std::move(arguments); +} + class ConvertStringsToEnumVisitor : public InDepthQueryTreeVisitor { public: - explicit ConvertStringsToEnumVisitor(std::unordered_set used_functions_, ContextPtr context_) - : used_functions(std::move(used_functions_)), context(std::move(context_)) + explicit ConvertStringsToEnumVisitor(ContextPtr context_) + : context(std::move(context_)) { } - static bool needChildVisit(VisitQueryTreeNodeType & parent, VisitQueryTreeNodeType & /* child */) - { - return parent->getNodeType() != QueryTreeNodeType::FUNCTION; - } - void visitImpl(QueryTreeNodePtr & node) { auto * function_node = node->as(); @@ -136,19 +107,18 @@ public: if (!function_node) return; - /// we cannot change the type of its result because it's used - /// as argument in another function - if (used_functions.contains(function_node)) - return; + /// to preserve return type (String) of the current function_node, we wrap the newly + /// generated function nodes into toString std::string_view function_name = function_node->getFunctionName(); if (function_name == "if") { - auto & argument_nodes = function_node->getArguments().getNodes(); - - if (argument_nodes.size() != 3) + if (function_node->getArguments().getNodes().size() != 3) return; + auto modified_if_node = function_node->clone(); + auto & argument_nodes = modified_if_node->as()->getArguments().getNodes(); + const auto * first_literal = argument_nodes[1]->as(); const auto * second_literal = argument_nodes[2]->as(); @@ -163,16 +133,18 @@ public: string_values.insert(second_literal->getValue().get()); changeIfArguments(argument_nodes[1], argument_nodes[2], string_values, context); + wrapIntoToString(*function_node, std::move(modified_if_node), context); return; } if (function_name == "transform") { - auto & argument_nodes = function_node->getArguments().getNodes(); - - if (argument_nodes.size() != 4) + if (function_node->getArguments().getNodes().size() != 4) return; + auto modified_transform_node = function_node->clone(); + auto & argument_nodes = modified_transform_node->as()->getArguments().getNodes(); + const auto * literal_to = argument_nodes[2]->as(); const auto * literal_default = argument_nodes[3]->as(); @@ -202,12 +174,12 @@ public: string_values.insert(literal_default->getValue().get()); changeTransformArguments(argument_nodes[2], argument_nodes[3], string_values, context); + wrapIntoToString(*function_node, std::move(modified_transform_node), context); return; } } private: - std::unordered_set used_functions; ContextPtr context; }; @@ -215,18 +187,8 @@ private: void IfTransformStringsToEnumPass::run(QueryTreeNodePtr query, ContextPtr context) { - std::unordered_set used_functions; - std::unordered_set function_names{"if", "transform"}; - - { - FindUsedFunctionsVisitor visitor(used_functions, function_names, 0); - visitor.visit(query); - } - - { - ConvertStringsToEnumVisitor visitor(std::move(used_functions), context); - visitor.visit(query); - } + ConvertStringsToEnumVisitor visitor(context); + visitor.visit(query); } } diff --git a/tests/queries/0_stateless/02497_if_transform_strings_to_enum.reference b/tests/queries/0_stateless/02497_if_transform_strings_to_enum.reference index ed514d478f5..ac322bd1fe9 100644 --- a/tests/queries/0_stateless/02497_if_transform_strings_to_enum.reference +++ b/tests/queries/0_stateless/02497_if_transform_strings_to_enum.reference @@ -16,25 +16,28 @@ QUERY id: 0 transform(number, [2, 4, 6], [\'google\', \'censor.net\', \'yahoo\'], \'other\') String PROJECTION LIST id: 1, nodes: 1 - FUNCTION id: 2, function_name: transform, function_type: ordinary, result_type: String + FUNCTION id: 2, function_name: toString, function_type: ordinary, result_type: String ARGUMENTS - LIST id: 3, nodes: 4 - COLUMN id: 4, column_name: number, result_type: UInt64, source_id: 5 - CONSTANT id: 6, constant_value: Array_[UInt64_2, UInt64_4, UInt64_6], constant_value_type: Array(UInt8) - FUNCTION id: 7, function_name: _CAST, function_type: ordinary, result_type: Array(Enum8(\'censor.net\' = 1, \'google\' = 2, \'other\' = 3, \'yahoo\' = 4)) + LIST id: 3, nodes: 1 + FUNCTION id: 4, function_name: transform, function_type: ordinary, result_type: String ARGUMENTS - LIST id: 8, nodes: 2 - CONSTANT id: 9, constant_value: Array_[\'google\', \'censor.net\', \'yahoo\'], constant_value_type: Array(String) - CONSTANT id: 10, constant_value: \'Array(Enum8(\\\'censor.net\\\' = 1, \\\'google\\\' = 2, \\\'other\\\' = 3, \\\'yahoo\\\' = 4))\', constant_value_type: String - FUNCTION id: 11, function_name: _CAST, function_type: ordinary, result_type: Enum8(\'censor.net\' = 1, \'google\' = 2, \'other\' = 3, \'yahoo\' = 4) - ARGUMENTS - LIST id: 12, nodes: 2 - CONSTANT id: 13, constant_value: \'other\', constant_value_type: String - CONSTANT id: 14, constant_value: \'Enum8(\\\'censor.net\\\' = 1, \\\'google\\\' = 2, \\\'other\\\' = 3, \\\'yahoo\\\' = 4)\', constant_value_type: String + LIST id: 5, nodes: 4 + COLUMN id: 6, column_name: number, result_type: UInt64, source_id: 7 + CONSTANT id: 8, constant_value: Array_[UInt64_2, UInt64_4, UInt64_6], constant_value_type: Array(UInt8) + FUNCTION id: 9, function_name: _CAST, function_type: ordinary, result_type: Array(Enum8(\'censor.net\' = 1, \'google\' = 2, \'other\' = 3, \'yahoo\' = 4)) + ARGUMENTS + LIST id: 10, nodes: 2 + CONSTANT id: 11, constant_value: Array_[\'google\', \'censor.net\', \'yahoo\'], constant_value_type: Array(String) + CONSTANT id: 12, constant_value: \'Array(Enum8(\\\'censor.net\\\' = 1, \\\'google\\\' = 2, \\\'other\\\' = 3, \\\'yahoo\\\' = 4))\', constant_value_type: String + FUNCTION id: 13, function_name: _CAST, function_type: ordinary, result_type: Enum8(\'censor.net\' = 1, \'google\' = 2, \'other\' = 3, \'yahoo\' = 4) + ARGUMENTS + LIST id: 14, nodes: 2 + CONSTANT id: 15, constant_value: \'other\', constant_value_type: String + CONSTANT id: 16, constant_value: \'Enum8(\\\'censor.net\\\' = 1, \\\'google\\\' = 2, \\\'other\\\' = 3, \\\'yahoo\\\' = 4)\', constant_value_type: String JOIN TREE - TABLE id: 5, table_name: system.numbers + TABLE id: 7, table_name: system.numbers LIMIT - CONSTANT id: 15, constant_value: UInt64_10, constant_value_type: UInt8 + CONSTANT id: 17, constant_value: UInt64_10, constant_value_type: UInt8 google google google @@ -53,49 +56,9 @@ QUERY id: 0 if(greater(number, 5), \'censor.net\', \'google\') String PROJECTION LIST id: 1, nodes: 1 - FUNCTION id: 2, function_name: if, function_type: ordinary, result_type: String + FUNCTION id: 2, function_name: toString, function_type: ordinary, result_type: String ARGUMENTS - LIST id: 3, nodes: 3 - FUNCTION id: 4, function_name: greater, function_type: ordinary, result_type: UInt8 - ARGUMENTS - LIST id: 5, nodes: 2 - COLUMN id: 6, column_name: number, result_type: UInt64, source_id: 7 - CONSTANT id: 8, constant_value: UInt64_5, constant_value_type: UInt8 - FUNCTION id: 9, function_name: _CAST, function_type: ordinary, result_type: Enum8(\'censor.net\' = 1, \'google\' = 2) - ARGUMENTS - LIST id: 10, nodes: 2 - CONSTANT id: 11, constant_value: \'censor.net\', constant_value_type: String - CONSTANT id: 12, constant_value: \'Enum8(\\\'censor.net\\\' = 1, \\\'google\\\' = 2)\', constant_value_type: String - FUNCTION id: 13, function_name: _CAST, function_type: ordinary, result_type: Enum8(\'censor.net\' = 1, \'google\' = 2) - ARGUMENTS - LIST id: 14, nodes: 2 - CONSTANT id: 15, constant_value: \'google\', constant_value_type: String - CONSTANT id: 12, constant_value: \'Enum8(\\\'censor.net\\\' = 1, \\\'google\\\' = 2)\', constant_value_type: String - JOIN TREE - TABLE id: 7, table_name: system.numbers - LIMIT - CONSTANT id: 16, constant_value: UInt64_10, constant_value_type: UInt8 -21 -21 -21 -21 -21 -21 -20 -20 -20 -20 -SELECT CAST(if(number > 5, \'20\', \'21\'), \'Int8\') -FROM system.numbers -LIMIT 10 -QUERY id: 0 - PROJECTION COLUMNS - CAST(if(greater(number, 5), \'20\', \'21\'), \'Int8\') Int8 - PROJECTION - LIST id: 1, nodes: 1 - FUNCTION id: 2, function_name: CAST, function_type: ordinary, result_type: Int8 - ARGUMENTS - LIST id: 3, nodes: 2 + LIST id: 3, nodes: 1 FUNCTION id: 4, function_name: if, function_type: ordinary, result_type: String ARGUMENTS LIST id: 5, nodes: 3 @@ -104,13 +67,344 @@ QUERY id: 0 LIST id: 7, nodes: 2 COLUMN id: 8, column_name: number, result_type: UInt64, source_id: 9 CONSTANT id: 10, constant_value: UInt64_5, constant_value_type: UInt8 - CONSTANT id: 11, constant_value: \'20\', constant_value_type: String - CONSTANT id: 12, constant_value: \'21\', constant_value_type: String - CONSTANT id: 13, constant_value: \'Int8\', constant_value_type: String + FUNCTION id: 11, function_name: _CAST, function_type: ordinary, result_type: Enum8(\'censor.net\' = 1, \'google\' = 2) + ARGUMENTS + LIST id: 12, nodes: 2 + CONSTANT id: 13, constant_value: \'censor.net\', constant_value_type: String + CONSTANT id: 14, constant_value: \'Enum8(\\\'censor.net\\\' = 1, \\\'google\\\' = 2)\', constant_value_type: String + FUNCTION id: 15, function_name: _CAST, function_type: ordinary, result_type: Enum8(\'censor.net\' = 1, \'google\' = 2) + ARGUMENTS + LIST id: 16, nodes: 2 + CONSTANT id: 17, constant_value: \'google\', constant_value_type: String + CONSTANT id: 18, constant_value: \'Enum8(\\\'censor.net\\\' = 1, \\\'google\\\' = 2)\', constant_value_type: String JOIN TREE TABLE id: 9, table_name: system.numbers LIMIT - CONSTANT id: 14, constant_value: UInt64_10, constant_value_type: UInt8 + CONSTANT id: 19, constant_value: UInt64_10, constant_value_type: UInt8 +other1 +other1 +google1 +other1 +censor.net1 +other1 +yahoo1 +other1 +other1 +other1 +SELECT concat(transform(number, [2, 4, 6], [\'google\', \'censor.net\', \'yahoo\'], \'other\'), \'1\') +FROM system.numbers +LIMIT 10 +QUERY id: 0 + PROJECTION COLUMNS + CONCAT(transform(number, [2, 4, 6], [\'google\', \'censor.net\', \'yahoo\'], \'other\'), \'1\') String + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: concat, function_type: ordinary, result_type: String + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: toString, function_type: ordinary, result_type: String + ARGUMENTS + LIST id: 5, nodes: 1 + FUNCTION id: 6, function_name: transform, function_type: ordinary, result_type: String + ARGUMENTS + LIST id: 7, nodes: 4 + COLUMN id: 8, column_name: number, result_type: UInt64, source_id: 9 + CONSTANT id: 10, constant_value: Array_[UInt64_2, UInt64_4, UInt64_6], constant_value_type: Array(UInt8) + FUNCTION id: 11, function_name: _CAST, function_type: ordinary, result_type: Array(Enum8(\'censor.net\' = 1, \'google\' = 2, \'other\' = 3, \'yahoo\' = 4)) + ARGUMENTS + LIST id: 12, nodes: 2 + CONSTANT id: 13, constant_value: Array_[\'google\', \'censor.net\', \'yahoo\'], constant_value_type: Array(String) + CONSTANT id: 14, constant_value: \'Array(Enum8(\\\'censor.net\\\' = 1, \\\'google\\\' = 2, \\\'other\\\' = 3, \\\'yahoo\\\' = 4))\', constant_value_type: String + FUNCTION id: 15, function_name: _CAST, function_type: ordinary, result_type: Enum8(\'censor.net\' = 1, \'google\' = 2, \'other\' = 3, \'yahoo\' = 4) + ARGUMENTS + LIST id: 16, nodes: 2 + CONSTANT id: 17, constant_value: \'other\', constant_value_type: String + CONSTANT id: 18, constant_value: \'Enum8(\\\'censor.net\\\' = 1, \\\'google\\\' = 2, \\\'other\\\' = 3, \\\'yahoo\\\' = 4)\', constant_value_type: String + CONSTANT id: 19, constant_value: \'1\', constant_value_type: String + JOIN TREE + TABLE id: 9, table_name: system.numbers + LIMIT + CONSTANT id: 20, constant_value: UInt64_10, constant_value_type: UInt8 +google1 +google1 +google1 +google1 +google1 +google1 +censor.net1 +censor.net1 +censor.net1 +censor.net1 +SELECT concat(if(number > 5, \'censor.net\', \'google\'), \'1\') +FROM system.numbers +LIMIT 10 +QUERY id: 0 + PROJECTION COLUMNS + CONCAT(if(greater(number, 5), \'censor.net\', \'google\'), \'1\') String + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: concat, function_type: ordinary, result_type: String + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: toString, function_type: ordinary, result_type: String + ARGUMENTS + LIST id: 5, nodes: 1 + FUNCTION id: 6, function_name: if, function_type: ordinary, result_type: String + ARGUMENTS + LIST id: 7, nodes: 3 + FUNCTION id: 8, function_name: greater, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 9, nodes: 2 + COLUMN id: 10, column_name: number, result_type: UInt64, source_id: 11 + CONSTANT id: 12, constant_value: UInt64_5, constant_value_type: UInt8 + FUNCTION id: 13, function_name: _CAST, function_type: ordinary, result_type: Enum8(\'censor.net\' = 1, \'google\' = 2) + ARGUMENTS + LIST id: 14, nodes: 2 + CONSTANT id: 15, constant_value: \'censor.net\', constant_value_type: String + CONSTANT id: 16, constant_value: \'Enum8(\\\'censor.net\\\' = 1, \\\'google\\\' = 2)\', constant_value_type: String + FUNCTION id: 17, function_name: _CAST, function_type: ordinary, result_type: Enum8(\'censor.net\' = 1, \'google\' = 2) + ARGUMENTS + LIST id: 18, nodes: 2 + CONSTANT id: 19, constant_value: \'google\', constant_value_type: String + CONSTANT id: 20, constant_value: \'Enum8(\\\'censor.net\\\' = 1, \\\'google\\\' = 2)\', constant_value_type: String + CONSTANT id: 21, constant_value: \'1\', constant_value_type: String + JOIN TREE + TABLE id: 11, table_name: system.numbers + LIMIT + CONSTANT id: 22, constant_value: UInt64_10, constant_value_type: UInt8 +google +google +google +google +google +google +censor.net +censor.net +censor.net +censor.net +SELECT value +FROM +( + SELECT if(number > 5, _CAST(\'censor.net\', \'Enum8(\\\'censor.net\\\' = 1, \\\'google\\\' = 2)\'), _CAST(\'google\', \'Enum8(\\\'censor.net\\\' = 1, \\\'google\\\' = 2)\')) AS value + FROM system.numbers + LIMIT 10 +) AS t1 +QUERY id: 0 + PROJECTION COLUMNS + value String + PROJECTION + LIST id: 1, nodes: 1 + COLUMN id: 2, column_name: value, result_type: String, source_id: 3 + JOIN TREE + QUERY id: 3, alias: t1, is_subquery: 1 + PROJECTION COLUMNS + value String + PROJECTION + LIST id: 4, nodes: 1 + FUNCTION id: 5, function_name: toString, function_type: ordinary, result_type: String + ARGUMENTS + LIST id: 6, nodes: 1 + FUNCTION id: 7, function_name: if, function_type: ordinary, result_type: String + ARGUMENTS + LIST id: 8, nodes: 3 + FUNCTION id: 9, function_name: greater, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 10, nodes: 2 + COLUMN id: 11, column_name: number, result_type: UInt64, source_id: 12 + CONSTANT id: 13, constant_value: UInt64_5, constant_value_type: UInt8 + FUNCTION id: 14, function_name: _CAST, function_type: ordinary, result_type: Enum8(\'censor.net\' = 1, \'google\' = 2) + ARGUMENTS + LIST id: 15, nodes: 2 + CONSTANT id: 16, constant_value: \'censor.net\', constant_value_type: String + CONSTANT id: 17, constant_value: \'Enum8(\\\'censor.net\\\' = 1, \\\'google\\\' = 2)\', constant_value_type: String + FUNCTION id: 18, function_name: _CAST, function_type: ordinary, result_type: Enum8(\'censor.net\' = 1, \'google\' = 2) + ARGUMENTS + LIST id: 19, nodes: 2 + CONSTANT id: 20, constant_value: \'google\', constant_value_type: String + CONSTANT id: 21, constant_value: \'Enum8(\\\'censor.net\\\' = 1, \\\'google\\\' = 2)\', constant_value_type: String + JOIN TREE + TABLE id: 12, table_name: system.numbers + LIMIT + CONSTANT id: 22, constant_value: UInt64_10, constant_value_type: UInt8 +other +other +google +other +censor.net +other +yahoo +other +other +other +SELECT value +FROM +( + SELECT transform(number, [2, 4, 6], _CAST([\'google\', \'censor.net\', \'yahoo\'], \'Array(Enum8(\\\'censor.net\\\' = 1, \\\'google\\\' = 2, \\\'other\\\' = 3, \\\'yahoo\\\' = 4))\'), _CAST(\'other\', \'Enum8(\\\'censor.net\\\' = 1, \\\'google\\\' = 2, \\\'other\\\' = 3, \\\'yahoo\\\' = 4)\')) AS value + FROM system.numbers + LIMIT 10 +) AS t1 +QUERY id: 0 + PROJECTION COLUMNS + value String + PROJECTION + LIST id: 1, nodes: 1 + COLUMN id: 2, column_name: value, result_type: String, source_id: 3 + JOIN TREE + QUERY id: 3, alias: t1, is_subquery: 1 + PROJECTION COLUMNS + value String + PROJECTION + LIST id: 4, nodes: 1 + FUNCTION id: 5, function_name: toString, function_type: ordinary, result_type: String + ARGUMENTS + LIST id: 6, nodes: 1 + FUNCTION id: 7, function_name: transform, function_type: ordinary, result_type: String + ARGUMENTS + LIST id: 8, nodes: 4 + COLUMN id: 9, column_name: number, result_type: UInt64, source_id: 10 + CONSTANT id: 11, constant_value: Array_[UInt64_2, UInt64_4, UInt64_6], constant_value_type: Array(UInt8) + FUNCTION id: 12, function_name: _CAST, function_type: ordinary, result_type: Array(Enum8(\'censor.net\' = 1, \'google\' = 2, \'other\' = 3, \'yahoo\' = 4)) + ARGUMENTS + LIST id: 13, nodes: 2 + CONSTANT id: 14, constant_value: Array_[\'google\', \'censor.net\', \'yahoo\'], constant_value_type: Array(String) + CONSTANT id: 15, constant_value: \'Array(Enum8(\\\'censor.net\\\' = 1, \\\'google\\\' = 2, \\\'other\\\' = 3, \\\'yahoo\\\' = 4))\', constant_value_type: String + FUNCTION id: 16, function_name: _CAST, function_type: ordinary, result_type: Enum8(\'censor.net\' = 1, \'google\' = 2, \'other\' = 3, \'yahoo\' = 4) + ARGUMENTS + LIST id: 17, nodes: 2 + CONSTANT id: 18, constant_value: \'other\', constant_value_type: String + CONSTANT id: 19, constant_value: \'Enum8(\\\'censor.net\\\' = 1, \\\'google\\\' = 2, \\\'other\\\' = 3, \\\'yahoo\\\' = 4)\', constant_value_type: String + JOIN TREE + TABLE id: 10, table_name: system.numbers + LIMIT + CONSTANT id: 20, constant_value: UInt64_10, constant_value_type: UInt8 +google google +google google +google google +google google +google google +google google +censor.net censor.net +censor.net censor.net +censor.net censor.net +censor.net censor.net +SELECT + if(number > 5, _CAST(\'censor.net\', \'Enum8(\\\'censor.net\\\' = 1, \\\'google\\\' = 2)\'), _CAST(\'google\', \'Enum8(\\\'censor.net\\\' = 1, \\\'google\\\' = 2)\')) AS value, + value +FROM system.numbers +LIMIT 10 +QUERY id: 0 + PROJECTION COLUMNS + value String + value String + PROJECTION + LIST id: 1, nodes: 2 + FUNCTION id: 2, function_name: toString, function_type: ordinary, result_type: String + ARGUMENTS + LIST id: 3, nodes: 1 + FUNCTION id: 4, function_name: if, function_type: ordinary, result_type: String + ARGUMENTS + LIST id: 5, nodes: 3 + FUNCTION id: 6, function_name: greater, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 7, nodes: 2 + COLUMN id: 8, column_name: number, result_type: UInt64, source_id: 9 + CONSTANT id: 10, constant_value: UInt64_5, constant_value_type: UInt8 + FUNCTION id: 11, function_name: _CAST, function_type: ordinary, result_type: Enum8(\'censor.net\' = 1, \'google\' = 2) + ARGUMENTS + LIST id: 12, nodes: 2 + CONSTANT id: 13, constant_value: \'censor.net\', constant_value_type: String + CONSTANT id: 14, constant_value: \'Enum8(\\\'censor.net\\\' = 1, \\\'google\\\' = 2)\', constant_value_type: String + FUNCTION id: 15, function_name: _CAST, function_type: ordinary, result_type: Enum8(\'censor.net\' = 1, \'google\' = 2) + ARGUMENTS + LIST id: 16, nodes: 2 + CONSTANT id: 17, constant_value: \'google\', constant_value_type: String + CONSTANT id: 18, constant_value: \'Enum8(\\\'censor.net\\\' = 1, \\\'google\\\' = 2)\', constant_value_type: String + FUNCTION id: 2, function_name: toString, function_type: ordinary, result_type: String + ARGUMENTS + LIST id: 3, nodes: 1 + FUNCTION id: 4, function_name: if, function_type: ordinary, result_type: String + ARGUMENTS + LIST id: 5, nodes: 3 + FUNCTION id: 6, function_name: greater, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 7, nodes: 2 + COLUMN id: 8, column_name: number, result_type: UInt64, source_id: 9 + CONSTANT id: 10, constant_value: UInt64_5, constant_value_type: UInt8 + FUNCTION id: 11, function_name: _CAST, function_type: ordinary, result_type: Enum8(\'censor.net\' = 1, \'google\' = 2) + ARGUMENTS + LIST id: 12, nodes: 2 + CONSTANT id: 13, constant_value: \'censor.net\', constant_value_type: String + CONSTANT id: 14, constant_value: \'Enum8(\\\'censor.net\\\' = 1, \\\'google\\\' = 2)\', constant_value_type: String + FUNCTION id: 15, function_name: _CAST, function_type: ordinary, result_type: Enum8(\'censor.net\' = 1, \'google\' = 2) + ARGUMENTS + LIST id: 16, nodes: 2 + CONSTANT id: 17, constant_value: \'google\', constant_value_type: String + CONSTANT id: 18, constant_value: \'Enum8(\\\'censor.net\\\' = 1, \\\'google\\\' = 2)\', constant_value_type: String + JOIN TREE + TABLE id: 9, table_name: system.numbers + LIMIT + CONSTANT id: 19, constant_value: UInt64_10, constant_value_type: UInt8 +other other +other other +google google +other other +censor.net censor.net +other other +yahoo yahoo +other other +other other +other other +SELECT + transform(number, [2, 4, 6], _CAST([\'google\', \'censor.net\', \'yahoo\'], \'Array(Enum8(\\\'censor.net\\\' = 1, \\\'google\\\' = 2, \\\'other\\\' = 3, \\\'yahoo\\\' = 4))\'), _CAST(\'other\', \'Enum8(\\\'censor.net\\\' = 1, \\\'google\\\' = 2, \\\'other\\\' = 3, \\\'yahoo\\\' = 4)\')) AS value, + value +FROM system.numbers +LIMIT 10 +QUERY id: 0 + PROJECTION COLUMNS + value String + value String + PROJECTION + LIST id: 1, nodes: 2 + FUNCTION id: 2, function_name: toString, function_type: ordinary, result_type: String + ARGUMENTS + LIST id: 3, nodes: 1 + FUNCTION id: 4, function_name: transform, function_type: ordinary, result_type: String + ARGUMENTS + LIST id: 5, nodes: 4 + COLUMN id: 6, column_name: number, result_type: UInt64, source_id: 7 + CONSTANT id: 8, constant_value: Array_[UInt64_2, UInt64_4, UInt64_6], constant_value_type: Array(UInt8) + FUNCTION id: 9, function_name: _CAST, function_type: ordinary, result_type: Array(Enum8(\'censor.net\' = 1, \'google\' = 2, \'other\' = 3, \'yahoo\' = 4)) + ARGUMENTS + LIST id: 10, nodes: 2 + CONSTANT id: 11, constant_value: Array_[\'google\', \'censor.net\', \'yahoo\'], constant_value_type: Array(String) + CONSTANT id: 12, constant_value: \'Array(Enum8(\\\'censor.net\\\' = 1, \\\'google\\\' = 2, \\\'other\\\' = 3, \\\'yahoo\\\' = 4))\', constant_value_type: String + FUNCTION id: 13, function_name: _CAST, function_type: ordinary, result_type: Enum8(\'censor.net\' = 1, \'google\' = 2, \'other\' = 3, \'yahoo\' = 4) + ARGUMENTS + LIST id: 14, nodes: 2 + CONSTANT id: 15, constant_value: \'other\', constant_value_type: String + CONSTANT id: 16, constant_value: \'Enum8(\\\'censor.net\\\' = 1, \\\'google\\\' = 2, \\\'other\\\' = 3, \\\'yahoo\\\' = 4)\', constant_value_type: String + FUNCTION id: 2, function_name: toString, function_type: ordinary, result_type: String + ARGUMENTS + LIST id: 3, nodes: 1 + FUNCTION id: 4, function_name: transform, function_type: ordinary, result_type: String + ARGUMENTS + LIST id: 5, nodes: 4 + COLUMN id: 6, column_name: number, result_type: UInt64, source_id: 7 + CONSTANT id: 8, constant_value: Array_[UInt64_2, UInt64_4, UInt64_6], constant_value_type: Array(UInt8) + FUNCTION id: 9, function_name: _CAST, function_type: ordinary, result_type: Array(Enum8(\'censor.net\' = 1, \'google\' = 2, \'other\' = 3, \'yahoo\' = 4)) + ARGUMENTS + LIST id: 10, nodes: 2 + CONSTANT id: 11, constant_value: Array_[\'google\', \'censor.net\', \'yahoo\'], constant_value_type: Array(String) + CONSTANT id: 12, constant_value: \'Array(Enum8(\\\'censor.net\\\' = 1, \\\'google\\\' = 2, \\\'other\\\' = 3, \\\'yahoo\\\' = 4))\', constant_value_type: String + FUNCTION id: 13, function_name: _CAST, function_type: ordinary, result_type: Enum8(\'censor.net\' = 1, \'google\' = 2, \'other\' = 3, \'yahoo\' = 4) + ARGUMENTS + LIST id: 14, nodes: 2 + CONSTANT id: 15, constant_value: \'other\', constant_value_type: String + CONSTANT id: 16, constant_value: \'Enum8(\\\'censor.net\\\' = 1, \\\'google\\\' = 2, \\\'other\\\' = 3, \\\'yahoo\\\' = 4)\', constant_value_type: String + JOIN TREE + TABLE id: 7, table_name: system.numbers + LIMIT + CONSTANT id: 17, constant_value: UInt64_10, constant_value_type: UInt8 other other google diff --git a/tests/queries/0_stateless/02497_if_transform_strings_to_enum.sql b/tests/queries/0_stateless/02497_if_transform_strings_to_enum.sql index 1a10a578146..02467325c3f 100644 --- a/tests/queries/0_stateless/02497_if_transform_strings_to_enum.sql +++ b/tests/queries/0_stateless/02497_if_transform_strings_to_enum.sql @@ -9,9 +9,29 @@ SELECT number > 5 ? 'censor.net' : 'google' FROM system.numbers LIMIT 10; EXPLAIN SYNTAX SELECT number > 5 ? 'censor.net' : 'google' FROM system.numbers LIMIT 10; EXPLAIN QUERY TREE run_passes = 1 SELECT number > 5 ? 'censor.net' : 'google' FROM system.numbers LIMIT 10; -SELECT CAST(number > 5 ? '20' : '21', 'Int8') FROM system.numbers LIMIT 10; -EXPLAIN SYNTAX SELECT CAST(number > 5 ? '20' : '21', 'Int8') FROM system.numbers LIMIT 10; -EXPLAIN QUERY TREE run_passes = 1 SELECT CAST(number > 5 ? '20' : '21', 'Int8') FROM system.numbers LIMIT 10; +SELECT CONCAT(transform(number, [2, 4, 6], ['google', 'censor.net', 'yahoo'], 'other'), '1') FROM system.numbers LIMIT 10; +EXPLAIN SYNTAX SELECT CONCAT(transform(number, [2, 4, 6], ['google', 'censor.net', 'yahoo'], 'other'), '1') FROM system.numbers LIMIT 10; +EXPLAIN QUERY TREE run_passes = 1 SELECT CONCAT(transform(number, [2, 4, 6], ['google', 'censor.net', 'yahoo'], 'other'), '1') FROM system.numbers LIMIT 10; + +SELECT CONCAT(number > 5 ? 'censor.net' : 'google', '1') FROM system.numbers LIMIT 10; +EXPLAIN SYNTAX SELECT CONCAT(number > 5 ? 'censor.net' : 'google', '1') FROM system.numbers LIMIT 10; +EXPLAIN QUERY TREE run_passes = 1 SELECT CONCAT(number > 5 ? 'censor.net' : 'google', '1') FROM system.numbers LIMIT 10; + +SELECT t1.value FROM (SELECT number > 5 ? 'censor.net' : 'google' as value FROM system.numbers LIMIT 10) as t1; +EXPLAIN SYNTAX SELECT t1.value FROM (SELECT number > 5 ? 'censor.net' : 'google' as value FROM system.numbers LIMIT 10) as t1; +EXPLAIN QUERY TREE run_passes = 1 SELECT t1.value FROM (SELECT number > 5 ? 'censor.net' : 'google' as value FROM system.numbers LIMIT 10) as t1; + +SELECT t1.value FROM (SELECT transform(number, [2, 4, 6], ['google', 'censor.net', 'yahoo'], 'other') as value FROM system.numbers LIMIT 10) as t1; +EXPLAIN SYNTAX SELECT t1.value FROM (SELECT transform(number, [2, 4, 6], ['google', 'censor.net', 'yahoo'], 'other') as value FROM system.numbers LIMIT 10) as t1; +EXPLAIN QUERY TREE run_passes = 1 SELECT t1.value FROM (SELECT transform(number, [2, 4, 6], ['google', 'censor.net', 'yahoo'], 'other') as value FROM system.numbers LIMIT 10) as t1; + +SELECT number > 5 ? 'censor.net' : 'google' as value, value FROM system.numbers LIMIT 10; +EXPLAIN SYNTAX SELECT number > 5 ? 'censor.net' : 'google' as value, value FROM system.numbers LIMIT 10; +EXPLAIN QUERY TREE run_passes = 1 SELECT number > 5 ? 'censor.net' : 'google' as value, value FROM system.numbers LIMIT 10; + +SELECT transform(number, [2, 4, 6], ['google', 'censor.net', 'yahoo'], 'other') as value, value FROM system.numbers LIMIT 10; +EXPLAIN SYNTAX SELECT transform(number, [2, 4, 6], ['google', 'censor.net', 'yahoo'], 'other') as value, value FROM system.numbers LIMIT 10; +EXPLAIN QUERY TREE run_passes = 1 SELECT transform(number, [2, 4, 6], ['google', 'censor.net', 'yahoo'], 'other') as value, value FROM system.numbers LIMIT 10; SET optimize_if_transform_strings_to_enum = 0; From 048aecf54008bed51fd2abe24c387e2306235db4 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Fri, 9 Dec 2022 09:11:10 +0000 Subject: [PATCH 09/35] Fix tests --- .../Passes/IfTransformStringsToEnumPass.cpp | 9 ++- ...497_if_transform_strings_to_enum.reference | 69 +++++++++++++++++++ .../02497_if_transform_strings_to_enum.sql | 8 +++ 3 files changed, 83 insertions(+), 3 deletions(-) diff --git a/src/Analyzer/Passes/IfTransformStringsToEnumPass.cpp b/src/Analyzer/Passes/IfTransformStringsToEnumPass.cpp index 18518bb5f8b..65120632c0c 100644 --- a/src/Analyzer/Passes/IfTransformStringsToEnumPass.cpp +++ b/src/Analyzer/Passes/IfTransformStringsToEnumPass.cpp @@ -83,7 +83,7 @@ void changeTransformArguments( void wrapIntoToString(FunctionNode & function_node, QueryTreeNodePtr arg, ContextPtr context) { - assert(WhichDataType(function_node.getResultType()).isString()); + assert(isString(function_node.getResultType())); auto to_string_function = FunctionFactory::instance().get("toString", std::move(context)); QueryTreeNodes arguments{std::move(arg)}; @@ -125,7 +125,7 @@ public: if (!first_literal || !second_literal) return; - if (!WhichDataType(first_literal->getResultType()).isString() || !WhichDataType(second_literal->getResultType()).isString()) + if (!isString(first_literal->getResultType()) || !isString(second_literal->getResultType())) return; std::set string_values; @@ -145,13 +145,16 @@ public: auto modified_transform_node = function_node->clone(); auto & argument_nodes = modified_transform_node->as()->getArguments().getNodes(); + if (!isString(function_node->getResultType())) + return; + const auto * literal_to = argument_nodes[2]->as(); const auto * literal_default = argument_nodes[3]->as(); if (!literal_to || !literal_default) return; - if (!WhichDataType(literal_to->getResultType()).isArray() || !WhichDataType(literal_default->getResultType()).isString()) + if (!isArray(literal_to->getResultType()) || !isString(literal_default->getResultType())) return; auto array_to = literal_to->getValue().get(); diff --git a/tests/queries/0_stateless/02497_if_transform_strings_to_enum.reference b/tests/queries/0_stateless/02497_if_transform_strings_to_enum.reference index ac322bd1fe9..06863f1858b 100644 --- a/tests/queries/0_stateless/02497_if_transform_strings_to_enum.reference +++ b/tests/queries/0_stateless/02497_if_transform_strings_to_enum.reference @@ -405,6 +405,75 @@ QUERY id: 0 TABLE id: 7, table_name: system.numbers LIMIT CONSTANT id: 17, constant_value: UInt64_10, constant_value_type: UInt8 +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +SELECT transform(number, [NULL], _CAST([\'google\', \'censor.net\', \'yahoo\'], \'Array(Enum8(\\\'censor.net\\\' = 1, \\\'google\\\' = 2, \\\'other\\\' = 3, \\\'yahoo\\\' = 4))\'), _CAST(\'other\', \'Enum8(\\\'censor.net\\\' = 1, \\\'google\\\' = 2, \\\'other\\\' = 3, \\\'yahoo\\\' = 4)\')) +FROM +( + SELECT NULL AS number + FROM system.numbers + LIMIT 10 +) +QUERY id: 0 + PROJECTION COLUMNS + transform(number, [NULL], [\'google\', \'censor.net\', \'yahoo\'], \'other\') Nullable(Nothing) + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: transform, function_type: ordinary, result_type: Nullable(Nothing) + ARGUMENTS + LIST id: 3, nodes: 4 + COLUMN id: 4, column_name: number, result_type: Nullable(Nothing), source_id: 5 + CONSTANT id: 6, constant_value: Array_[NULL], constant_value_type: Array(Nullable(Nothing)) + CONSTANT id: 7, constant_value: Array_[\'google\', \'censor.net\', \'yahoo\'], constant_value_type: Array(String) + CONSTANT id: 8, constant_value: \'other\', constant_value_type: String + JOIN TREE + QUERY id: 5, is_subquery: 1 + PROJECTION COLUMNS + number Nullable(Nothing) + PROJECTION + LIST id: 9, nodes: 1 + CONSTANT id: 10, constant_value: NULL, constant_value_type: Nullable(Nothing) + JOIN TREE + TABLE id: 11, table_name: system.numbers + LIMIT + CONSTANT id: 12, constant_value: UInt64_10, constant_value_type: UInt8 +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +SELECT transform(number, NULL, _CAST([\'google\', \'censor.net\', \'yahoo\'], \'Array(Enum8(\\\'censor.net\\\' = 1, \\\'google\\\' = 2, \\\'other\\\' = 3, \\\'yahoo\\\' = 4))\'), _CAST(\'other\', \'Enum8(\\\'censor.net\\\' = 1, \\\'google\\\' = 2, \\\'other\\\' = 3, \\\'yahoo\\\' = 4)\')) +FROM system.numbers +LIMIT 10 +QUERY id: 0 + PROJECTION COLUMNS + transform(number, NULL, [\'google\', \'censor.net\', \'yahoo\'], \'other\') Nullable(Nothing) + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: transform, function_type: ordinary, result_type: Nullable(Nothing) + ARGUMENTS + LIST id: 3, nodes: 4 + COLUMN id: 4, column_name: number, result_type: UInt64, source_id: 5 + CONSTANT id: 6, constant_value: NULL, constant_value_type: Nullable(Nothing) + CONSTANT id: 7, constant_value: Array_[\'google\', \'censor.net\', \'yahoo\'], constant_value_type: Array(String) + CONSTANT id: 8, constant_value: \'other\', constant_value_type: String + JOIN TREE + TABLE id: 5, table_name: system.numbers + LIMIT + CONSTANT id: 9, constant_value: UInt64_10, constant_value_type: UInt8 other other google diff --git a/tests/queries/0_stateless/02497_if_transform_strings_to_enum.sql b/tests/queries/0_stateless/02497_if_transform_strings_to_enum.sql index 02467325c3f..c23046c7b20 100644 --- a/tests/queries/0_stateless/02497_if_transform_strings_to_enum.sql +++ b/tests/queries/0_stateless/02497_if_transform_strings_to_enum.sql @@ -33,6 +33,14 @@ SELECT transform(number, [2, 4, 6], ['google', 'censor.net', 'yahoo'], 'other') EXPLAIN SYNTAX SELECT transform(number, [2, 4, 6], ['google', 'censor.net', 'yahoo'], 'other') as value, value FROM system.numbers LIMIT 10; EXPLAIN QUERY TREE run_passes = 1 SELECT transform(number, [2, 4, 6], ['google', 'censor.net', 'yahoo'], 'other') as value, value FROM system.numbers LIMIT 10; +SELECT transform(number, [NULL], ['google', 'censor.net', 'yahoo'], 'other') FROM (SELECT NULL as number FROM system.numbers LIMIT 10); +EXPLAIN SYNTAX SELECT transform(number, [NULL], ['google', 'censor.net', 'yahoo'], 'other') FROM (SELECT NULL as number FROM system.numbers LIMIT 10); +EXPLAIN QUERY TREE run_passes = 1 SELECT transform(number, [NULL], ['google', 'censor.net', 'yahoo'], 'other') FROM (SELECT NULL as number FROM system.numbers LIMIT 10); + +SELECT transform(number, NULL, ['google', 'censor.net', 'yahoo'], 'other') FROM system.numbers LIMIT 10; +EXPLAIN SYNTAX SELECT transform(number, NULL, ['google', 'censor.net', 'yahoo'], 'other') FROM system.numbers LIMIT 10; +EXPLAIN QUERY TREE run_passes = 1 SELECT transform(number, NULL, ['google', 'censor.net', 'yahoo'], 'other') FROM system.numbers LIMIT 10; + SET optimize_if_transform_strings_to_enum = 0; SELECT transform(number, [2, 4, 6], ['google', 'censor.net', 'yahoo'], 'other') FROM system.numbers LIMIT 10; From e18ac19ab303fa014d669ec1beea6d29dcd117ae Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Mon, 12 Dec 2022 11:55:33 +0000 Subject: [PATCH 10/35] Prevent dropping nested column if it can create empty part --- src/Storages/MergeTree/MergeTreeData.cpp | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 6b58e23b661..767ea348ed1 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -2600,7 +2600,18 @@ void MergeTreeData::checkAlterIsPossible(const AlterCommands & commands, Context } } - dropped_columns.emplace(command.column_name); + if (old_metadata.columns.has(command.column_name)) + { + dropped_columns.emplace(command.column_name); + } + else + { + const auto & nested = old_metadata.columns.getNested(command.column_name); + assert(!nested.empty()); + for (const auto & nested_column : nested) + dropped_columns.emplace(nested_column.name); + } + } else if (command.type == AlterCommand::RESET_SETTING) { From 965d08575e8a904c4a22aeb57f2d62aff6739d75 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Mon, 12 Dec 2022 12:08:42 +0000 Subject: [PATCH 11/35] Add test --- ...revent_drop_nested_if_empty_part.reference | 0 ...2500_prevent_drop_nested_if_empty_part.sql | 49 +++++++++++++++++++ 2 files changed, 49 insertions(+) create mode 100644 tests/queries/0_stateless/02500_prevent_drop_nested_if_empty_part.reference create mode 100644 tests/queries/0_stateless/02500_prevent_drop_nested_if_empty_part.sql diff --git a/tests/queries/0_stateless/02500_prevent_drop_nested_if_empty_part.reference b/tests/queries/0_stateless/02500_prevent_drop_nested_if_empty_part.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02500_prevent_drop_nested_if_empty_part.sql b/tests/queries/0_stateless/02500_prevent_drop_nested_if_empty_part.sql new file mode 100644 index 00000000000..529f574d32d --- /dev/null +++ b/tests/queries/0_stateless/02500_prevent_drop_nested_if_empty_part.sql @@ -0,0 +1,49 @@ +DROP TABLE IF EXISTS 02500_nested; + +SET flatten_nested = 1; + +CREATE TABLE 02500_nested(arr Array(Tuple(a Int32, b Int32))) Engine=MergeTree ORDER BY tuple(); +INSERT INTO 02500_nested(arr.a, arr.b) VALUES ([1], [2]); +ALTER TABLE 02500_nested ADD COLUMN z Int32; +ALTER TABLE 02500_nested DROP COLUMN arr; -- { serverError BAD_ARGUMENTS } +DROP TABLE 02500_nested; + +CREATE TABLE 02500_nested(arr Array(Tuple(a Int32, b Int32)), z Int32) Engine=MergeTree ORDER BY tuple(); +INSERT INTO 02500_nested(arr.a, arr.b, z) VALUES ([1], [2], 2); +ALTER TABLE 02500_nested DROP COLUMN arr; +DROP TABLE 02500_nested; + +CREATE TABLE 02500_nested(nes Nested(a Int32, b Int32)) Engine=MergeTree ORDER BY tuple(); +INSERT INTO 02500_nested(nes.a, nes.b) VALUES ([1], [2]); +ALTER TABLE 02500_nested ADD COLUMN z Int32; +ALTER TABLE 02500_nested DROP COLUMN nes; -- { serverError BAD_ARGUMENTS } +DROP TABLE 02500_nested; + +CREATE TABLE 02500_nested(nes Array(Tuple(a Int32, b Int32)), z Int32) Engine=MergeTree ORDER BY tuple(); +INSERT INTO 02500_nested(nes.a, nes.b, z) VALUES ([1], [2], 2); +ALTER TABLE 02500_nested DROP COLUMN nes; +DROP TABLE 02500_nested; + +SET flatten_nested = 0; + +CREATE TABLE 02500_nested(arr Array(Tuple(a Int32, b Int32))) Engine=MergeTree ORDER BY tuple(); +INSERT INTO 02500_nested(arr) VALUES ([(1, 2)]); +ALTER TABLE 02500_nested ADD COLUMN z Int32; +ALTER TABLE 02500_nested DROP COLUMN arr; -- { serverError BAD_ARGUMENTS } +DROP TABLE 02500_nested; + +CREATE TABLE 02500_nested(arr Array(Tuple(a Int32, b Int32)), z Int32) Engine=MergeTree ORDER BY tuple(); +INSERT INTO 02500_nested(arr, z) VALUES ([(1, 2)], 2); +ALTER TABLE 02500_nested DROP COLUMN arr; +DROP TABLE 02500_nested; + +CREATE TABLE 02500_nested(nes Nested(a Int32, b Int32)) Engine=MergeTree ORDER BY tuple(); +INSERT INTO 02500_nested(nes) VALUES ([(1, 2)]); +ALTER TABLE 02500_nested ADD COLUMN z Int32; +ALTER TABLE 02500_nested DROP COLUMN nes; -- { serverError BAD_ARGUMENTS } +DROP TABLE 02500_nested; + +CREATE TABLE 02500_nested(nes Array(Tuple(a Int32, b Int32)), z Int32) Engine=MergeTree ORDER BY tuple(); +INSERT INTO 02500_nested(nes, z) VALUES ([(1, 2)], 2); +ALTER TABLE 02500_nested DROP COLUMN nes; +DROP TABLE 02500_nested; From a25c0d7544336fd962b64cffde7ca069e3f23b30 Mon Sep 17 00:00:00 2001 From: vdimir Date: Tue, 6 Dec 2022 11:45:41 +0000 Subject: [PATCH 12/35] Join engine works with analyzer --- src/Interpreters/HashJoin.cpp | 2 +- src/Interpreters/TableJoin.cpp | 5 + src/Interpreters/TableJoin.h | 1 + src/Planner/PlannerJoins.cpp | 17 +- src/Storages/StorageJoin.cpp | 9 +- .../02495_analyzer_storage_join.reference | 215 ++++++++++++++++++ .../02495_analyzer_storage_join.sql | 88 +++++++ .../02497_storage_join_right_assert.reference | 2 + .../02497_storage_join_right_assert.sql | 4 + ...8_storage_join_key_positions.reference.j2} | 11 + ...> 02498_storage_join_key_positions.sql.j2} | 47 ++-- 11 files changed, 382 insertions(+), 19 deletions(-) create mode 100644 tests/queries/0_stateless/02495_analyzer_storage_join.reference create mode 100644 tests/queries/0_stateless/02495_analyzer_storage_join.sql rename tests/queries/0_stateless/{02498_storage_join_key_positions.reference => 02498_storage_join_key_positions.reference.j2} (84%) rename tests/queries/0_stateless/{02498_storage_join_key_positions.sql => 02498_storage_join_key_positions.sql.j2} (56%) diff --git a/src/Interpreters/HashJoin.cpp b/src/Interpreters/HashJoin.cpp index f79ea950436..9fd577318f8 100644 --- a/src/Interpreters/HashJoin.cpp +++ b/src/Interpreters/HashJoin.cpp @@ -225,7 +225,7 @@ HashJoin::HashJoin(std::shared_ptr table_join_, const Block & right_s , right_sample_block(right_sample_block_) , log(&Poco::Logger::get("HashJoin")) { - LOG_DEBUG(log, "Datatype: {}, kind: {}, strictness: {}", data->type, kind, strictness); + LOG_DEBUG(log, "Datatype: {}, kind: {}, strictness: {}, right header: {}", data->type, kind, strictness, right_sample_block.dumpStructure()); LOG_DEBUG(log, "Keys: {}", TableJoin::formatClauses(table_join->getClauses(), true)); if (isCrossOrComma(kind)) diff --git a/src/Interpreters/TableJoin.cpp b/src/Interpreters/TableJoin.cpp index 5d065e564b2..aa4f821657f 100644 --- a/src/Interpreters/TableJoin.cpp +++ b/src/Interpreters/TableJoin.cpp @@ -672,6 +672,11 @@ String TableJoin::renamedRightColumnName(const String & name) const return name; } +void TableJoin::setRename(const String & from, const String & to) +{ + renames[from] = to; +} + void TableJoin::addKey(const String & left_name, const String & right_name, const ASTPtr & left_ast, const ASTPtr & right_ast) { clauses.back().key_names_left.emplace_back(left_name); diff --git a/src/Interpreters/TableJoin.h b/src/Interpreters/TableJoin.h index 874e68b0b97..9d03c9bd57b 100644 --- a/src/Interpreters/TableJoin.h +++ b/src/Interpreters/TableJoin.h @@ -334,6 +334,7 @@ public: Block getRequiredRightKeys(const Block & right_table_keys, std::vector & keys_sources) const; String renamedRightColumnName(const String & name) const; + void setRename(const String & from, const String & to); void resetKeys(); void resetToCross(); diff --git a/src/Planner/PlannerJoins.cpp b/src/Planner/PlannerJoins.cpp index 019933f9b72..a17bbaebb04 100644 --- a/src/Planner/PlannerJoins.cpp +++ b/src/Planner/PlannerJoins.cpp @@ -45,8 +45,9 @@ namespace DB namespace ErrorCodes { - extern const int LOGICAL_ERROR; + extern const int INCOMPATIBLE_TYPE_OF_JOIN; extern const int INVALID_JOIN_ON_EXPRESSION; + extern const int LOGICAL_ERROR; extern const int NOT_IMPLEMENTED; } @@ -671,9 +672,23 @@ std::shared_ptr chooseJoinAlgorithm(std::shared_ptr & table_jo { trySetStorageInTableJoin(right_table_expression, table_join); + auto & right_table_expression_data = planner_context->getTableExpressionDataOrThrow(right_table_expression); + /// JOIN with JOIN engine. if (auto storage = table_join->getStorageJoin()) + { + for (const auto & result_column : right_table_expression_header) + { + const auto * source_column_name = right_table_expression_data.getColumnNameOrNull(result_column.name); + if (!source_column_name) + throw Exception(ErrorCodes::INCOMPATIBLE_TYPE_OF_JOIN, + "JOIN with 'Join' table engine should be performed by storage keys [{}], but column '{}' was found", + fmt::join(storage->getKeyNames(), ", "), result_column.name); + + table_join->setRename(*source_column_name, result_column.name); + } return storage->getJoinLocked(table_join, planner_context->getQueryContext()); + } /** JOIN with constant. * Example: SELECT * FROM test_table AS t1 INNER JOIN test_table AS t2 ON 1; diff --git a/src/Storages/StorageJoin.cpp b/src/Storages/StorageJoin.cpp index aec28b37928..86d56e43b25 100644 --- a/src/Storages/StorageJoin.cpp +++ b/src/Storages/StorageJoin.cpp @@ -174,6 +174,9 @@ HashJoinPtr StorageJoin::getJoinLocked(std::shared_ptr analyzed_join, "Table {} needs the same join_use_nulls setting as present in LEFT or FULL JOIN", getStorageID().getNameForLogs()); + if (analyzed_join->getClauses().size() != 1) + throw Exception(ErrorCodes::INCOMPATIBLE_TYPE_OF_JOIN, "JOIN keys should match to the Join engine keys [{}]", fmt::join(getKeyNames(), ", ")); + const auto & join_on = analyzed_join->getOnlyClause(); if (join_on.on_filter_condition_left || join_on.on_filter_condition_right) throw Exception(ErrorCodes::INCOMPATIBLE_TYPE_OF_JOIN, "ON section of JOIN with filter conditions is not implemented"); @@ -211,9 +214,9 @@ HashJoinPtr StorageJoin::getJoinLocked(std::shared_ptr analyzed_join, left_key_names_resorted.push_back(key_names_left[key_position]); } - /// Set names qualifiers: table.column -> column - /// It's required because storage join stores non-qualified names - /// Qualifies will be added by join implementation (HashJoin) + /// Set qualified identifiers to original names (table.column -> column). + /// It's required because storage join stores non-qualified names. + /// Qualifies will be added by join implementation (TableJoin contains a rename mapping). analyzed_join->setRightKeys(key_names); analyzed_join->setLeftKeys(left_key_names_resorted); diff --git a/tests/queries/0_stateless/02495_analyzer_storage_join.reference b/tests/queries/0_stateless/02495_analyzer_storage_join.reference new file mode 100644 index 00000000000..509a79bc618 --- /dev/null +++ b/tests/queries/0_stateless/02495_analyzer_storage_join.reference @@ -0,0 +1,215 @@ +--- no name clashes --- +id2 id1 val key2 key1 a b x y +0 0 0 6 -6 60 600 6000 60000 +5 -5 55 5 -5 50 500 5000 50000 +4 -4 44 4 -4 40 400 4000 40000 +3 -3 33 3 -3 30 300 3000 30000 +2 -2 22 2 -2 20 200 2000 20000 +id1 val key1 b x +0 0 -6 600 6000 +-5 55 -5 500 5000 +-4 44 -4 400 4000 +-3 33 -3 300 3000 +-2 22 -2 200 2000 +id1 val key1 b x +0 0 -6 600 6000 +-5 55 -5 500 5000 +-4 44 -4 400 4000 +-3 33 -3 300 3000 +-2 22 -2 200 2000 +val b x +0 600 6000 +55 500 5000 +44 400 4000 +33 300 3000 +22 200 2000 +val +0 +55 +44 +33 +22 +x +6000 +5000 +4000 +3000 +2000 +--- name clashes --- +-- using -- +key1 key2 t.b t.x val a tj.b tj.x y +-6 6 0 0 0 60 600 6000 60000 +-5 5 55 555 5555 50 500 5000 50000 +-4 4 44 444 4444 40 400 4000 40000 +-3 3 33 333 2222 30 300 3000 30000 +-2 2 22 222 2222 20 200 2000 20000 +key1 +-6 +-5 +-4 +-3 +-2 +t.key1 tj.key1 +0 -6 +-5 -5 +-4 -4 +-3 -3 +-2 -2 +t.key2 tj.key2 +0 6 +5 5 +4 4 +3 3 +2 2 +t.b tj.b +0 600 +55 500 +44 400 +33 300 +22 200 +t.x tj.b +0 600 +555 500 +444 400 +333 300 +222 200 +a +60 +50 +40 +30 +20 +tj.b +600 +500 +400 +300 +200 +tj.x +6000 +5000 +4000 +3000 +2000 +y +60000 +50000 +40000 +30000 +20000 +a +60 +50 +40 +30 +20 +y +60000 +50000 +40000 +30000 +20000 +val +0 +5555 +4444 +2222 +2222 +val +0 +5555 +4444 +2222 +2222 +-- on -- +t.key2 t.key1 t.b t.x val tj.key2 tj.key1 a tj.b tj.x y +5 -5 55 555 5555 5 -5 50 500 5000 50000 +4 -4 44 444 4444 4 -4 40 400 4000 40000 +3 -3 33 333 2222 3 -3 30 300 3000 30000 +2 -2 22 222 2222 2 -2 20 200 2000 20000 +0 0 0 0 0 6 -6 60 600 6000 60000 +t.key1 tj.key1 +-5 -5 +-4 -4 +-3 -3 +-2 -2 +0 -6 +t.key2 tj.key2 +5 5 +4 4 +3 3 +2 2 +0 6 +t.b tj.b +55 500 +44 400 +33 300 +22 200 +0 600 +t.x tj.b +555 500 +444 400 +333 300 +222 200 +0 600 +a +50 +40 +30 +20 +60 +tj.b +500 +400 +300 +200 +600 +tj.x +5000 +4000 +3000 +2000 +6000 +y +50000 +40000 +30000 +20000 +60000 +a +50 +40 +30 +20 +60 +y +50000 +40000 +30000 +20000 +60000 +val +5555 +4444 +2222 +2222 +0 +val +5555 +4444 +2222 +2222 +0 +--- unsupported and illegal conditions --- +t.key2 t.key1 t.b t.x val tj.key2 tj.key1 a tj.b tj.x y +5 -5 55 555 5555 4 -4 40 400 4000 40000 +4 -4 44 444 4444 3 -3 30 300 3000 30000 +3 -3 33 333 2222 2 -2 20 200 2000 20000 +0 0 0 0 0 5 -5 50 500 5000 50000 +0 0 0 0 0 6 -6 60 600 6000 60000 +t.key2 t.key1 t.b t.x val tj.key2 tj.key1 a tj.b tj.x y +2 -2 22 222 2222 2 -2 20 200 2000 20000 +0 0 0 0 0 3 -3 30 300 3000 30000 +0 0 0 0 0 4 -4 40 400 4000 40000 +0 0 0 0 0 5 -5 50 500 5000 50000 +0 0 0 0 0 6 -6 60 600 6000 60000 diff --git a/tests/queries/0_stateless/02495_analyzer_storage_join.sql b/tests/queries/0_stateless/02495_analyzer_storage_join.sql new file mode 100644 index 00000000000..6a4c1e45d69 --- /dev/null +++ b/tests/queries/0_stateless/02495_analyzer_storage_join.sql @@ -0,0 +1,88 @@ +DROP TABLE IF EXISTS t; +DROP TABLE IF EXISTS t1; +DROP TABLE IF EXISTS tj; + +SET allow_experimental_analyzer = 1; + +CREATE TABLE tj (key2 UInt64, key1 Int64, a UInt64, b UInt64, x UInt64, y UInt64) ENGINE = Join(ALL, RIGHT, key1, key2); +INSERT INTO tj VALUES (2, -2, 20, 200, 2000, 20000), (3, -3, 30, 300, 3000, 30000), (4, -4, 40, 400, 4000, 40000), (5, -5, 50, 500, 5000, 50000), (6, -6, 60, 600, 6000, 60000); + +SELECT '--- no name clashes ---'; + +CREATE TABLE t1 (id2 UInt64, id1 Int64, val UInt64) ENGINE = Memory; +INSERT INTO t1 VALUES (1, -1, 11), (2, -2, 22), (3, -3, 33), (4, -4, 44), (5, -5, 55); + +SELECT * FROM t1 ALL RIGHT JOIN tj ON t1.id1 == tj.key1 AND t1.id2 == tj.key2 ORDER BY key1 FORMAT TSVWithNames; +SELECT id1, val, key1, b, x FROM t1 ALL RIGHT JOIN tj ON t1.id1 == tj.key1 AND t1.id2 == tj.key2 ORDER BY key1 FORMAT TSVWithNames; +SELECT t1.id1, t1.val, tj.key1, tj.b, tj.x FROM t1 ALL RIGHT JOIN tj ON t1.id1 == tj.key1 AND t1.id2 == tj.key2 ORDER BY key1 FORMAT TSVWithNames; +SELECT val, b, x FROM t1 ALL RIGHT JOIN tj ON t1.id1 == tj.key1 AND t1.id2 == tj.key2 ORDER BY key1 FORMAT TSVWithNames; +SELECT val FROM t1 ALL RIGHT JOIN tj ON t1.id1 == tj.key1 AND t1.id2 == tj.key2 ORDER BY key1 FORMAT TSVWithNames; +SELECT x FROM t1 ALL RIGHT JOIN tj ON t1.id1 == tj.key1 AND t1.id2 == tj.key2 ORDER BY key1 FORMAT TSVWithNames; + +SELECT '--- name clashes ---'; + +CREATE TABLE t (key2 UInt64, key1 Int64, b UInt64, x UInt64, val UInt64) ENGINE = Memory; +INSERT INTO t VALUES (1, -1, 11, 111, 1111), (2, -2, 22, 222, 2222), (3, -3, 33, 333, 2222), (4, -4, 44, 444, 4444), (5, -5, 55, 555, 5555) + +SELECT '-- using --'; + +SELECT * FROM t ALL RIGHT JOIN tj USING (key1, key2) ORDER BY key1 FORMAT TSVWithNames; +SELECT key1 FROM t ALL RIGHT JOIN tj USING (key1, key2) ORDER BY key1 FORMAT TSVWithNames; +SELECT t.key1, tj.key1 FROM t ALL RIGHT JOIN tj USING (key1, key2) ORDER BY key1 FORMAT TSVWithNames; +SELECT t.key2, tj.key2 FROM t ALL RIGHT JOIN tj USING (key1, key2) ORDER BY key1 FORMAT TSVWithNames; +SELECT t.b, tj.b FROM t ALL RIGHT JOIN tj USING (key1, key2) ORDER BY key1 FORMAT TSVWithNames; +SELECT t.x, tj.b FROM t ALL RIGHT JOIN tj USING (key1, key2) ORDER BY key1 FORMAT TSVWithNames; +SELECT tj.a FROM t ALL RIGHT JOIN tj USING (key1, key2) ORDER BY key1 FORMAT TSVWithNames; +SELECT tj.b FROM t ALL RIGHT JOIN tj USING (key1, key2) ORDER BY key1 FORMAT TSVWithNames; +SELECT tj.x FROM t ALL RIGHT JOIN tj USING (key1, key2) ORDER BY key1 FORMAT TSVWithNames; +SELECT tj.y FROM t ALL RIGHT JOIN tj USING (key1, key2) ORDER BY key1 FORMAT TSVWithNames; +SELECT a FROM t ALL RIGHT JOIN tj USING (key1, key2) ORDER BY key1 FORMAT TSVWithNames; +SELECT b FROM t ALL RIGHT JOIN tj USING (key1, key2) ORDER BY key1 FORMAT TSVWithNames; -- { serverError AMBIGUOUS_IDENTIFIER } +SELECT x FROM t ALL RIGHT JOIN tj USING (key1, key2) ORDER BY key1 FORMAT TSVWithNames; -- { serverError AMBIGUOUS_IDENTIFIER } +SELECT y FROM t ALL RIGHT JOIN tj USING (key1, key2) ORDER BY key1 FORMAT TSVWithNames; +SELECT t.val FROM t ALL RIGHT JOIN tj USING (key1, key2) ORDER BY key1 FORMAT TSVWithNames; +SELECT val FROM t ALL RIGHT JOIN tj USING (key1, key2) ORDER BY key1 FORMAT TSVWithNames; + +SELECT '-- on --'; + +SELECT * FROM t ALL RIGHT JOIN tj ON t.key1 == tj.key1 AND t.key2 == tj.key2 ORDER BY t.key1 FORMAT TSVWithNames; +SELECT key1 FROM t ALL RIGHT JOIN tj ON t.key1 == tj.key1 AND t.key2 == tj.key2 ORDER BY t.key1 FORMAT TSVWithNames; -- { serverError AMBIGUOUS_IDENTIFIER } +SELECT t.key1, tj.key1 FROM t ALL RIGHT JOIN tj ON t.key1 == tj.key1 AND t.key2 == tj.key2 ORDER BY t.key1 FORMAT TSVWithNames; +SELECT t.key2, tj.key2 FROM t ALL RIGHT JOIN tj ON t.key1 == tj.key1 AND t.key2 == tj.key2 ORDER BY t.key1 FORMAT TSVWithNames; +SELECT t.b, tj.b FROM t ALL RIGHT JOIN tj ON t.key1 == tj.key1 AND t.key2 == tj.key2 ORDER BY t.key1 FORMAT TSVWithNames; +SELECT t.x, tj.b FROM t ALL RIGHT JOIN tj ON t.key1 == tj.key1 AND t.key2 == tj.key2 ORDER BY t.key1 FORMAT TSVWithNames; +SELECT tj.a FROM t ALL RIGHT JOIN tj ON t.key1 == tj.key1 AND t.key2 == tj.key2 ORDER BY t.key1 FORMAT TSVWithNames; +SELECT tj.b FROM t ALL RIGHT JOIN tj ON t.key1 == tj.key1 AND t.key2 == tj.key2 ORDER BY t.key1 FORMAT TSVWithNames; +SELECT tj.x FROM t ALL RIGHT JOIN tj ON t.key1 == tj.key1 AND t.key2 == tj.key2 ORDER BY t.key1 FORMAT TSVWithNames; +SELECT tj.y FROM t ALL RIGHT JOIN tj ON t.key1 == tj.key1 AND t.key2 == tj.key2 ORDER BY t.key1 FORMAT TSVWithNames; +SELECT a FROM t ALL RIGHT JOIN tj ON t.key1 == tj.key1 AND t.key2 == tj.key2 ORDER BY t.key1 FORMAT TSVWithNames; +SELECT b FROM t ALL RIGHT JOIN tj ON t.key1 == tj.key1 AND t.key2 == tj.key2 ORDER BY t.key1 FORMAT TSVWithNames; -- { serverError AMBIGUOUS_IDENTIFIER } +SELECT x FROM t ALL RIGHT JOIN tj ON t.key1 == tj.key1 AND t.key2 == tj.key2 ORDER BY t.key1 FORMAT TSVWithNames; -- { serverError AMBIGUOUS_IDENTIFIER } +SELECT y FROM t ALL RIGHT JOIN tj ON t.key1 == tj.key1 AND t.key2 == tj.key2 ORDER BY t.key1 FORMAT TSVWithNames; +SELECT t.val FROM t ALL RIGHT JOIN tj ON t.key1 == tj.key1 AND t.key2 == tj.key2 ORDER BY t.key1 FORMAT TSVWithNames; +SELECT val FROM t ALL RIGHT JOIN tj ON t.key1 == tj.key1 AND t.key2 == tj.key2 ORDER BY t.key1 FORMAT TSVWithNames; + +SELECT '--- unsupported and illegal conditions ---'; + +SELECT * FROM t ALL RIGHT JOIN tj ON t.key1 == tj.key1 AND t.key2 == tj.key2 + 1 FORMAT TSVWithNames; -- { serverError INCOMPATIBLE_TYPE_OF_JOIN } +SELECT * FROM t ALL RIGHT JOIN tj ON t.key1 + 1 == tj.key1 AND toUInt64(t.key2 - 1) == tj.key2 ORDER BY t.key1, tj.key2 FORMAT TSVWithNames; -- Ok: expression on the left table + +SELECT * FROM t ALL RIGHT JOIN tj ON t.key1 == tj.key1 AND t.key2 == tj.key2 AND 1 == 1 FORMAT TSVWithNames; -- { serverError INCOMPATIBLE_TYPE_OF_JOIN } +SELECT * FROM t ALL RIGHT JOIN tj ON t.key1 == tj.key1 AND t.key2 == tj.key2 AND 1 == 2 FORMAT TSVWithNames; -- { serverError INCOMPATIBLE_TYPE_OF_JOIN } + +SELECT * FROM t ALL RIGHT JOIN tj ON t.key1 == tj.key1 AND t.key2 == tj.key2 AND tj.a == 20 FORMAT TSVWithNames; -- { serverError INCOMPATIBLE_TYPE_OF_JOIN } +SELECT * FROM t ALL RIGHT JOIN tj ON t.key1 == tj.key1 AND t.key2 == tj.key2 AND t.b == 22 ORDER BY t.key1, tj.key2 FORMAT TSVWithNames; -- Ok: t.b from the left table + +SELECT * FROM t ALL RIGHT JOIN tj ON t.key1 == tj.key1 AND t.key2 == tj.key2 AND 1 != 1 FORMAT TSVWithNames; -- { serverError INCOMPATIBLE_TYPE_OF_JOIN } +SELECT * FROM t ALL RIGHT JOIN tj ON t.key1 == tj.key1 AND t.key2 == tj.key2 AND NULL FORMAT TSVWithNames; -- { serverError INCOMPATIBLE_TYPE_OF_JOIN } + +SELECT * FROM t ALL RIGHT JOIN tj ON t.key1 == tj.key1 AND t.key2 == tj.key2 AND 'aaa' FORMAT TSVWithNames; -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT * FROM t ALL RIGHT JOIN tj ON 'aaa' FORMAT TSVWithNames; -- { serverError INVALID_JOIN_ON_EXPRESSION } + +SELECT * FROM t ALL RIGHT JOIN tj ON t.key1 == tj.key1 AND t.key2 == tj.key2 AND 1 FORMAT TSVWithNames; -- { serverError INCOMPATIBLE_TYPE_OF_JOIN } +SELECT * FROM t ALL RIGHT JOIN tj ON 0 FORMAT TSVWithNames; -- { serverError INCOMPATIBLE_TYPE_OF_JOIN } +SELECT * FROM t ALL RIGHT JOIN tj ON 1 FORMAT TSVWithNames; -- { serverError INCOMPATIBLE_TYPE_OF_JOIN } + +DROP TABLE IF EXISTS t; +DROP TABLE IF EXISTS t1; +DROP TABLE IF EXISTS tj; diff --git a/tests/queries/0_stateless/02497_storage_join_right_assert.reference b/tests/queries/0_stateless/02497_storage_join_right_assert.reference index b254a03f4ce..fe032687be7 100644 --- a/tests/queries/0_stateless/02497_storage_join_right_assert.reference +++ b/tests/queries/0_stateless/02497_storage_join_right_assert.reference @@ -1,2 +1,4 @@ 2 2 2 3 0 3 +2 2 2 +3 0 3 diff --git a/tests/queries/0_stateless/02497_storage_join_right_assert.sql b/tests/queries/0_stateless/02497_storage_join_right_assert.sql index ee9e8713d32..8f9134e9504 100644 --- a/tests/queries/0_stateless/02497_storage_join_right_assert.sql +++ b/tests/queries/0_stateless/02497_storage_join_right_assert.sql @@ -7,4 +7,8 @@ CREATE TABLE t2 (key UInt64, a UInt64) ENGINE = Join(ALL, RIGHT, key); INSERT INTO t1 VALUES (1, 1), (2, 2); INSERT INTO t2 VALUES (2, 2), (3, 3); +SET allow_experimental_analyzer = 0; +SELECT * FROM t1 ALL RIGHT JOIN t2 USING (key) ORDER BY key; + +SET allow_experimental_analyzer = 1; SELECT * FROM t1 ALL RIGHT JOIN t2 USING (key) ORDER BY key; diff --git a/tests/queries/0_stateless/02498_storage_join_key_positions.reference b/tests/queries/0_stateless/02498_storage_join_key_positions.reference.j2 similarity index 84% rename from tests/queries/0_stateless/02498_storage_join_key_positions.reference rename to tests/queries/0_stateless/02498_storage_join_key_positions.reference.j2 index a11b547ca38..83d2ab32886 100644 --- a/tests/queries/0_stateless/02498_storage_join_key_positions.reference +++ b/tests/queries/0_stateless/02498_storage_join_key_positions.reference.j2 @@ -1,3 +1,5 @@ +{% for _ in range(2) -%} +--- using --- 21 22 23 2000 31 32 33 3000 41 42 43 4000 @@ -14,6 +16,11 @@ 31 32 33 3000 41 42 43 4000 51 52 53 5000 +21 22 23 2000 +31 32 33 3000 +41 42 43 4000 +51 52 53 5000 +--- on --- 21 22 23 22 21 23 2000 31 32 33 32 31 33 3000 41 42 43 42 41 43 4000 @@ -30,6 +37,7 @@ 31 32 33 32 31 33 3000 41 42 43 42 41 43 4000 51 52 53 52 51 53 5000 +--- on different name --- 23 21 22 22 21 23 2000 33 31 32 32 31 33 3000 43 41 42 42 41 43 4000 @@ -42,6 +50,8 @@ 33 31 32 32 31 33 3000 43 41 42 42 41 43 4000 53 51 52 52 51 53 5000 +--- incompatible --- +--- reuse column from left --- 11 12 13 11 11 11 1000 21 22 23 21 21 21 2000 31 32 33 31 31 31 3000 @@ -52,3 +62,4 @@ 31 32 33 31 31 31 3000 41 42 43 41 41 41 4000 51 52 53 51 51 51 5000 +{% endfor -%} diff --git a/tests/queries/0_stateless/02498_storage_join_key_positions.sql b/tests/queries/0_stateless/02498_storage_join_key_positions.sql.j2 similarity index 56% rename from tests/queries/0_stateless/02498_storage_join_key_positions.sql rename to tests/queries/0_stateless/02498_storage_join_key_positions.sql.j2 index 96687dab577..697f37fd535 100644 --- a/tests/queries/0_stateless/02498_storage_join_key_positions.sql +++ b/tests/queries/0_stateless/02498_storage_join_key_positions.sql.j2 @@ -8,20 +8,38 @@ INSERT INTO t1 VALUES (11, 12, 13), (21, 22, 23), (31, 32, 33), (41, 42, 43), (5 CREATE TABLE tj (key2 UInt64, key1 UInt64, key3 UInt64, attr UInt64) ENGINE = Join(ALL, INNER, key3, key2, key1); INSERT INTO tj VALUES (22, 21, 23, 2000), (32, 31, 33, 3000), (42, 41, 43, 4000), (52, 51, 53, 5000), (62, 61, 63, 6000); +CREATE TABLE tjj (key2 UInt64, key1 UInt64, key3 UInt64, attr UInt64) ENGINE = Join(ALL, INNER, key3, key2, key1); +INSERT INTO tjj VALUES (11, 11, 11, 1000), (21, 21, 21, 2000), (31, 31, 31, 3000), (41, 41, 41, 4000), (51, 51, 51, 5000), (61, 61, 61, 6000); + + +{% for use_analyzer in [0, 1] -%} +SET allow_experimental_analyzer = '{{ use_analyzer }}'; + +SELECT '--- using ---'; SELECT * FROM t1 ALL INNER JOIN tj USING (key1, key2, key3) ORDER BY key1; -SELECT * FROM t1 ALL INNER JOIN tj USING (key2, key3, key1) ORDER BY key1; -SELECT * FROM t1 ALL INNER JOIN tj USING (key3, key2, key1) ORDER BY key1; -SELECT * FROM t1 ALL INNER JOIN tj USING (key1, key3, key2) ORDER BY key1; +SELECT key1, key2, key3, attr FROM t1 ALL INNER JOIN tj USING (key1, key2, key3) ORDER BY key1; +SELECT key1, key2, key3, attr FROM t1 ALL INNER JOIN tj USING (key2, key3, key1) ORDER BY key1; +SELECT key1, key2, key3, attr FROM t1 ALL INNER JOIN tj USING (key3, key2, key1) ORDER BY key1; +SELECT key1, key2, key3, attr FROM t1 ALL INNER JOIN tj USING (key1, key3, key2) ORDER BY key1; -SELECT * FROM t1 ALL INNER JOIN tj ON t1.key3 = tj.key3 AND t1.key2 = tj.key2 AND t1.key1 = tj.key1 ORDER BY key1; -SELECT * FROM t1 ALL INNER JOIN tj ON t1.key2 = tj.key2 AND t1.key3 = tj.key3 AND t1.key1 = tj.key1 ORDER BY key1; -SELECT * FROM t1 ALL INNER JOIN tj ON t1.key3 = tj.key3 AND t1.key1 = tj.key1 AND t1.key2 = tj.key2 ORDER BY key1; -SELECT * FROM t1 ALL INNER JOIN tj ON t1.key1 = tj.key1 AND t1.key3 = tj.key3 AND t1.key2 = tj.key2 ORDER BY key1; +SELECT '--- on ---'; +SELECT * FROM t1 ALL INNER JOIN tj ON t1.key3 = tj.key3 AND t1.key2 = tj.key2 AND t1.key1 = tj.key1 ORDER BY t1.key1; +SELECT * FROM t1 ALL INNER JOIN tj ON t1.key2 = tj.key2 AND t1.key3 = tj.key3 AND t1.key1 = tj.key1 ORDER BY t1.key1; +SELECT * FROM t1 ALL INNER JOIN tj ON t1.key3 = tj.key3 AND t1.key1 = tj.key1 AND t1.key2 = tj.key2 ORDER BY t1.key1; +SELECT * FROM t1 ALL INNER JOIN tj ON t1.key1 = tj.key1 AND t1.key3 = tj.key3 AND t1.key2 = tj.key2 ORDER BY t1.key1; +SELECT '--- on different name ---'; SELECT * FROM (SELECT key3 AS c, key1 AS a, key2 AS b FROM t1) AS t1 ALL INNER JOIN tj ON t1.a = tj.key1 AND t1.c = tj.key3 AND t1.b = tj.key2 ORDER BY t1.a; SELECT * FROM (SELECT key3 AS c, key1 AS a, key2 AS b FROM t1) AS t1 ALL INNER JOIN tj ON t1.a = tj.key1 AND t1.b = tj.key2 AND t1.c = tj.key3 ORDER BY t1.a; SELECT * FROM (SELECT key3 AS c, key1 AS a, key2 AS b FROM t1) AS t1 ALL INNER JOIN tj ON t1.c = tj.key3 AND t1.a = tj.key1 AND t1.b = tj.key2 ORDER BY t1.a; +-- TODO (vdimir): uncomment after https://github.com/ClickHouse/ClickHouse/pull/44016 +-- SELECT * FROM t1 ALL INNER JOIN tj ON t1.key1 = tj.key1 AND t1.key3 = tj.key3 AND t1.key2 = tj.key2 AND 1; -- { serverError INCOMPATIBLE_TYPE_OF_JOIN } +-- SELECT * FROM t1 ALL INNER JOIN tj ON t1.key1 = tj.key1 AND t1.key3 = tj.key3 AND t1.key2 = tj.key2 AND 0; -- { serverError INCOMPATIBLE_TYPE_OF_JOIN } +-- SELECT * FROM t1 ALL INNER JOIN tj ON t1.key1 = tj.key1 AND t1.key3 = tj.key3 AND t1.key2 = tj.key2 AND 1 == 1; -- { serverError INCOMPATIBLE_TYPE_OF_JOIN } +-- SELECT * FROM t1 ALL INNER JOIN tj ON t1.key1 = tj.key1 AND t1.key3 = tj.key3 AND t1.key2 = tj.key2 AND 1 > 1; -- { serverError INCOMPATIBLE_TYPE_OF_JOIN } + +SELECT '--- incompatible ---'; SELECT * FROM t1 ALL INNER JOIN tj ON 1; -- { serverError INCOMPATIBLE_TYPE_OF_JOIN } SELECT * FROM t1 ALL INNER JOIN tj ON 0; -- { serverError INCOMPATIBLE_TYPE_OF_JOIN } SELECT * FROM t1 ALL INNER JOIN tj ON NULL; -- { serverError INCOMPATIBLE_TYPE_OF_JOIN } @@ -29,19 +47,20 @@ SELECT * FROM t1 ALL INNER JOIN tj ON 1 == 1; -- { serverError INCOMPATIBLE_TYPE SELECT * FROM t1 ALL INNER JOIN tj ON 1 != 1; -- { serverError INCOMPATIBLE_TYPE_OF_JOIN } SELECT * FROM t1 ALL INNER JOIN tj USING (key2, key3); -- { serverError INCOMPATIBLE_TYPE_OF_JOIN } -SELECT * FROM t1 ALL INNER JOIN tj USING (key1, key2, attr); -- { serverError INCOMPATIBLE_TYPE_OF_JOIN } -SELECT * FROM t1 ALL INNER JOIN tj USING (key1, key2, key3, attr); -- { serverError INCOMPATIBLE_TYPE_OF_JOIN } +SELECT * FROM t1 ALL INNER JOIN tj USING (key1, key2, attr) SETTINGS allow_experimental_analyzer = 0; -- { serverError INCOMPATIBLE_TYPE_OF_JOIN } +SELECT * FROM t1 ALL INNER JOIN tj USING (key1, key2, attr) SETTINGS allow_experimental_analyzer = 1; -- { serverError UNKNOWN_IDENTIFIER } +SELECT * FROM t1 ALL INNER JOIN tj USING (key1, key2, key3, attr) SETTINGS allow_experimental_analyzer = 0; -- { serverError INCOMPATIBLE_TYPE_OF_JOIN } +SELECT * FROM t1 ALL INNER JOIN tj USING (key1, key2, key3, attr) SETTINGS allow_experimental_analyzer = 1; -- { serverError UNKNOWN_IDENTIFIER } SELECT * FROM t1 ALL INNER JOIN tj ON t1.key1 = tj.attr; -- { serverError INCOMPATIBLE_TYPE_OF_JOIN } SELECT * FROM t1 ALL INNER JOIN tj ON t1.key1 = tj.key1; -- { serverError INCOMPATIBLE_TYPE_OF_JOIN } SELECT * FROM t1 ALL INNER JOIN tj ON t1.key1 = tj.key1 AND t1.key2 = tj.key2 AND t1.key3 = tj.attr; -- { serverError INCOMPATIBLE_TYPE_OF_JOIN } SELECT * FROM t1 ALL INNER JOIN tj ON t1.key1 = tj.key1 AND t1.key2 = tj.key2 AND t1.key3 = tj.key3 AND t1.key1 = tj.key1; -- { serverError INCOMPATIBLE_TYPE_OF_JOIN } +SELECT '--- reuse column from left ---'; +SELECT * FROM t1 ALL INNER JOIN tjj ON t1.key1 = tjj.key1 AND t1.key1 = tjj.key2 AND t1.key1 = tjj.key3 ORDER BY t1.key1; +SELECT * FROM t1 ALL INNER JOIN tjj ON t1.key1 = tjj.key1 AND t1.key1 = tjj.key3 AND t1.key1 = tjj.key2 ORDER BY t1.key1; -CREATE TABLE tjj (key2 UInt64, key1 UInt64, key3 UInt64, attr UInt64) ENGINE = Join(ALL, INNER, key3, key2, key1); -INSERT INTO tjj VALUES (11, 11, 11, 1000), (21, 21, 21, 2000), (31, 31, 31, 3000), (41, 41, 41, 4000), (51, 51, 51, 5000), (61, 61, 61, 6000); - -SELECT * FROM t1 ALL INNER JOIN tjj ON t1.key1 = tjj.key1 AND t1.key1 = tjj.key2 AND t1.key1 = tjj.key3 ORDER BY key1; -SELECT * FROM t1 ALL INNER JOIN tjj ON t1.key1 = tjj.key1 AND t1.key1 = tjj.key3 AND t1.key1 = tjj.key2 ORDER BY key1; +{% endfor -%} DROP TABLE IF EXISTS t1; DROP TABLE IF EXISTS tj; From 78c433b79d121d3d9cc80a15c21a15a2ef617c7d Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Sun, 4 Dec 2022 01:13:54 +0100 Subject: [PATCH 13/35] Improve TablesDependencyGraph. --- src/Databases/TablesDependencyGraph.cpp | 195 +++++++++++++++++------- src/Databases/TablesDependencyGraph.h | 39 +++-- 2 files changed, 165 insertions(+), 69 deletions(-) diff --git a/src/Databases/TablesDependencyGraph.cpp b/src/Databases/TablesDependencyGraph.cpp index c4c361089ad..ee6ecb57eba 100644 --- a/src/Databases/TablesDependencyGraph.cpp +++ b/src/Databases/TablesDependencyGraph.cpp @@ -1,5 +1,6 @@ #include #include +#include #include @@ -9,12 +10,13 @@ namespace DB namespace ErrorCodes { extern const int INFINITE_LOOP; + extern const int LOGICAL_ERROR; } namespace { - constexpr const size_t CYCLIC_LEVEL = static_cast(-2); + constexpr const size_t CYCLIC_LEVEL = std::numeric_limits::max(); } @@ -40,7 +42,7 @@ TablesDependencyGraph::TablesDependencyGraph(TablesDependencyGraph && src) noexc TablesDependencyGraph & TablesDependencyGraph::operator=(const TablesDependencyGraph & src) { - if (&src != this) + if (this != &src) { nodes = src.nodes; nodes_by_database_and_table_names = src.nodes_by_database_and_table_names; @@ -54,11 +56,14 @@ TablesDependencyGraph & TablesDependencyGraph::operator=(const TablesDependencyG TablesDependencyGraph & TablesDependencyGraph::operator=(TablesDependencyGraph && src) noexcept { - nodes = std::exchange(src.nodes, decltype(nodes){}); - nodes_by_database_and_table_names = std::exchange(src.nodes_by_database_and_table_names, decltype(nodes_by_database_and_table_names){}); - nodes_by_uuid = std::exchange(src.nodes_by_uuid, decltype(nodes_by_uuid){}); - levels_calculated = std::exchange(src.levels_calculated, false); - nodes_sorted_by_level_lazy = std::exchange(src.nodes_sorted_by_level_lazy, decltype(nodes_sorted_by_level_lazy){}); + if (this != &src) + { + nodes = std::exchange(src.nodes, decltype(nodes){}); + nodes_by_database_and_table_names = std::exchange(src.nodes_by_database_and_table_names, decltype(nodes_by_database_and_table_names){}); + nodes_by_uuid = std::exchange(src.nodes_by_uuid, decltype(nodes_by_uuid){}); + levels_calculated = std::exchange(src.levels_calculated, false); + nodes_sorted_by_level_lazy = std::exchange(src.nodes_sorted_by_level_lazy, decltype(nodes_sorted_by_level_lazy){}); + } return *this; } @@ -89,11 +94,13 @@ void TablesDependencyGraph::addDependency(const StorageID & table_id, const Stor auto * table_node = addOrUpdateNode(table_id); auto * dependency_node = addOrUpdateNode(dependency); - if (table_node->dependencies.contains(dependency_node)) - return; /// Already have this dependency. + bool inserted = table_node->dependencies.insert(dependency_node).second; + if (!inserted) + return; /// Not inserted because we already had this dependency. - table_node->dependencies.insert(dependency_node); - dependency_node->dependents.insert(table_node); + /// `dependency_node` must be updated too. + [[maybe_unused]] bool inserted_to_set = dependency_node->dependents.insert(table_node).second; + chassert(inserted_to_set); setNeedRecalculateLevels(); } @@ -126,13 +133,19 @@ void TablesDependencyGraph::addDependencies(const StorageID & table_id, const st for (auto * dependency_node : old_dependency_nodes) { if (!new_dependency_nodes.contains(dependency_node)) - dependency_node->dependents.erase(table_node); + { + [[maybe_unused]] bool removed_from_set = dependency_node->dependents.erase(table_node); + chassert(removed_from_set); + } } for (auto * dependency_node : new_dependency_nodes) { if (!old_dependency_nodes.contains(dependency_node)) - dependency_node->dependents.insert(table_node); + { + [[maybe_unused]] bool inserted_to_set = dependency_node->dependents.insert(table_node).second; + chassert(inserted_to_set); + } } table_node->dependencies = std::move(new_dependency_nodes); @@ -167,21 +180,28 @@ bool TablesDependencyGraph::removeDependency(const StorageID & table_id, const S auto dependency_it = table_node->dependencies.find(dependency_node); if (dependency_it == table_node->dependencies.end()) - return false; + return false; /// No such dependency, nothing to remove. table_node->dependencies.erase(dependency_it); - dependency_node->dependents.erase(table_node); bool table_node_removed = false; + /// `dependency_node` must be updated too. + [[maybe_unused]] bool removed_from_set = dependency_node->dependents.erase(table_node); + chassert(removed_from_set); + if (remove_isolated_tables && dependency_node->dependencies.empty() && dependency_node->dependents.empty()) { + /// The dependency table has no dependencies and no dependents now, so we will remove it from the graph. removeNode(dependency_node); if (table_node == dependency_node) table_node_removed = true; } if (remove_isolated_tables && !table_node_removed && table_node->dependencies.empty() && table_node->dependents.empty()) + { + /// The table `table_id` has no dependencies and no dependents now, so we will remove it from the graph. removeNode(table_node); + } setNeedRecalculateLevels(); return true; @@ -203,19 +223,28 @@ std::vector TablesDependencyGraph::removeDependencies(const StorageID for (auto * dependency_node : dependency_nodes) { + /// We're gathering the list of dependencies the table `table_id` had in the graph to return from the function. dependencies.emplace_back(dependency_node->storage_id); - dependency_node->dependents.erase(table_node); + + /// Update `dependency_node`. + [[maybe_unused]] bool removed_from_set = dependency_node->dependents.erase(table_node); + chassert(removed_from_set); if (remove_isolated_tables && dependency_node->dependencies.empty() && dependency_node->dependents.empty()) { + /// The dependency table has no dependencies and no dependents now, so we will remove it from the graph. removeNode(dependency_node); if (table_node == dependency_node) table_node_removed = true; } } - if (remove_isolated_tables && !table_node_removed && table_node->dependencies.empty() && table_node->dependents.empty()) + chassert(table_node->dependencies.empty()); + if (remove_isolated_tables && !table_node_removed && table_node->dependents.empty()) + { + /// The table `table_id` has no dependencies and no dependents now, so we will remove it from the graph. removeNode(table_node); + } setNeedRecalculateLevels(); return dependencies; @@ -251,7 +280,12 @@ TablesDependencyGraph::Node * TablesDependencyGraph::findNode(const StorageID & { auto * node = it->second; if (table_id.hasUUID() && node->storage_id.hasUUID() && (table_id.uuid != node->storage_id.uuid)) - return nullptr; /// UUID is different, it's not the node we're looking for. + { + /// We found a table with specified database and table names in the graph, but surprisingly it has a different UUID. + /// Maybe an "EXCHANGE TABLES" command has been executed somehow without changing the graph? + LOG_WARNING(getLogger(), "Found table {} in the graph with unexpected UUID {}", table_id, node->storage_id.uuid); + return nullptr; /// Act like it's not found. + } return node; /// Found by table name. } } @@ -268,7 +302,8 @@ TablesDependencyGraph::Node * TablesDependencyGraph::addOrUpdateNode(const Stora if (table_id.hasUUID() && !node->storage_id.hasUUID()) { node->storage_id.uuid = table_id.uuid; - nodes_by_uuid.emplace(node->storage_id.uuid, node); + [[maybe_unused]] bool inserted_to_map = nodes_by_uuid.emplace(node->storage_id.uuid, node).second; + chassert(inserted_to_map); } if (!table_id.table_name.empty() && ((table_id.table_name != node->storage_id.table_name) || (table_id.database_name != node->storage_id.database_name))) @@ -283,7 +318,8 @@ TablesDependencyGraph::Node * TablesDependencyGraph::addOrUpdateNode(const Stora nodes_by_database_and_table_names.erase(node->storage_id); node->storage_id.database_name = table_id.database_name; node->storage_id.table_name = table_id.table_name; - nodes_by_database_and_table_names.emplace(node->storage_id, node); + [[maybe_unused]] bool inserted_to_map = nodes_by_database_and_table_names.emplace(node->storage_id, node).second; + chassert(inserted_to_map); } } else @@ -303,9 +339,15 @@ TablesDependencyGraph::Node * TablesDependencyGraph::addOrUpdateNode(const Stora nodes.insert(node_ptr); node = node_ptr.get(); if (table_id.hasUUID()) - nodes_by_uuid.emplace(table_id.uuid, node); + { + [[maybe_unused]] bool inserted_to_map = nodes_by_uuid.emplace(table_id.uuid, node).second; + chassert(inserted_to_map); + } if (!table_id.table_name.empty()) - nodes_by_database_and_table_names.emplace(table_id, node); + { + [[maybe_unused]] bool inserted_to_map = nodes_by_database_and_table_names.emplace(table_id, node).second; + chassert(inserted_to_map); + } } return node; } @@ -313,22 +355,39 @@ TablesDependencyGraph::Node * TablesDependencyGraph::addOrUpdateNode(const Stora void TablesDependencyGraph::removeNode(Node * node) { + chassert(node); auto dependency_nodes = std::move(node->dependencies); auto dependent_nodes = std::move(node->dependents); if (node->storage_id.hasUUID()) - nodes_by_uuid.erase(node->storage_id.uuid); + { + [[maybe_unused]] bool removed_from_map = nodes_by_uuid.erase(node->storage_id.uuid); + chassert(removed_from_map); + } if (!node->storage_id.table_name.empty()) - nodes_by_database_and_table_names.erase(node->storage_id); + { + [[maybe_unused]]bool removed_from_map = nodes_by_database_and_table_names.erase(node->storage_id); + chassert(removed_from_map); + } for (auto * dependency_node : dependency_nodes) - dependency_node->dependents.erase(node); + { + [[maybe_unused]] bool removed_from_set = dependency_node->dependents.erase(node); + chassert(removed_from_set); + } for (auto * dependent_node : dependent_nodes) - dependent_node->dependencies.erase(node); + { + [[maybe_unused]] bool removed_from_set = dependent_node->dependencies.erase(node); + chassert(removed_from_set); + } - nodes.erase(node->shared_from_this()); + auto it = nodes.find(node); + chassert(it != nodes.end()); + nodes.erase(it); + + nodes_sorted_by_level_lazy.clear(); } @@ -533,7 +592,7 @@ String TablesDependencyGraph::describeCyclicDependencies() const } -void TablesDependencyGraph::setNeedRecalculateLevels() +void TablesDependencyGraph::setNeedRecalculateLevels() const { levels_calculated = false; nodes_sorted_by_level_lazy.clear(); @@ -546,49 +605,73 @@ void TablesDependencyGraph::calculateLevels() const return; levels_calculated = true; + /// First find tables with no dependencies, add them to `nodes_sorted_by_level_lazy`. + /// Then remove those tables from the dependency graph (we imitate that removing by decrementing `num_dependencies_to_count`), + /// and find which tables have no dependencies now. + /// Repeat until we have tables with no dependencies. + /// In the end we expect all nodes from `nodes` to be added to `nodes_sorted_by_level_lazy`. + /// If some nodes are still not added to `nodes_sorted_by_level_lazy` in the end then there is a cyclic dependency. + /// Complexity: O(V + E) + nodes_sorted_by_level_lazy.clear(); nodes_sorted_by_level_lazy.reserve(nodes.size()); - - std::unordered_set nodes_to_process; - for (const auto & node_ptr : nodes) - nodes_to_process.emplace(node_ptr.get()); - size_t current_level = 0; - while (!nodes_to_process.empty()) + /// Find tables with no dependencies. + for (const auto & node_ptr : nodes) { - size_t old_num_sorted = nodes_sorted_by_level_lazy.size(); - - for (auto it = nodes_to_process.begin(); it != nodes_to_process.end();) + const Node * node = node_ptr.get(); + node->num_dependencies_to_count = node->dependencies.size(); + if (!node->num_dependencies_to_count) { - const auto * current_node = *(it++); - bool has_dependencies = false; - for (const auto * dependency : current_node->dependencies) - { - if (nodes_to_process.contains(dependency)) - has_dependencies = true; - } + node->level = current_level; + nodes_sorted_by_level_lazy.emplace_back(node); + } + } - if (!has_dependencies) + size_t num_nodes_without_dependencies = nodes_sorted_by_level_lazy.size(); + ++current_level; + + while (num_nodes_without_dependencies) + { + size_t begin = nodes_sorted_by_level_lazy.size() - num_nodes_without_dependencies; + size_t end = nodes_sorted_by_level_lazy.size(); + + /// Decrement number of dependencies for each dependent table. + for (size_t i = begin; i != end; ++i) + { + const Node * current_node = nodes_sorted_by_level_lazy[i]; + for (const Node * dependent_node : current_node->dependents) { - current_node->level = current_level; - nodes_sorted_by_level_lazy.emplace_back(current_node); + if (!dependent_node->num_dependencies_to_count) + throw Exception(ErrorCodes::LOGICAL_ERROR, "{}: Trying to decrement 0 dependencies counter for {}. It's a bug", name_for_logging, dependent_node->storage_id); + + if (!--dependent_node->num_dependencies_to_count) + { + dependent_node->level = current_level; + nodes_sorted_by_level_lazy.emplace_back(dependent_node); + } } } - if (nodes_sorted_by_level_lazy.size() == old_num_sorted) - break; - - for (size_t i = old_num_sorted; i != nodes_sorted_by_level_lazy.size(); ++i) - nodes_to_process.erase(nodes_sorted_by_level_lazy[i]); + if (nodes_sorted_by_level_lazy.size() > nodes.size()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "{}: Some tables were found more than once while passing through the dependency graph. It's a bug", name_for_logging); + num_nodes_without_dependencies = nodes_sorted_by_level_lazy.size() - end; ++current_level; } - for (const auto * node_with_cyclic_dependencies : nodes_to_process) + if (nodes_sorted_by_level_lazy.size() < nodes.size()) { - node_with_cyclic_dependencies->level = CYCLIC_LEVEL; - nodes_sorted_by_level_lazy.emplace_back(node_with_cyclic_dependencies); + for (const auto & node_ptr : nodes) + { + const Node * node = node_ptr.get(); + if (node->num_dependencies_to_count) + { + node->level = CYCLIC_LEVEL; + nodes_sorted_by_level_lazy.emplace_back(node); + } + } } } @@ -630,7 +713,7 @@ std::vector> TablesDependencyGraph::getTablesSortedByDepe void TablesDependencyGraph::log() const { - if (empty()) + if (nodes.empty()) { LOG_TEST(getLogger(), "No tables"); return; diff --git a/src/Databases/TablesDependencyGraph.h b/src/Databases/TablesDependencyGraph.h index 0d60857dea8..e5be59d1ee9 100644 --- a/src/Databases/TablesDependencyGraph.h +++ b/src/Databases/TablesDependencyGraph.h @@ -20,11 +20,11 @@ using TableNamesSet = std::unordered_set; /// /// This class is used to represent various types of table-table dependencies: /// 1. View dependencies: "source_table -> materialized_view". -/// Data inserted to a source table is also inserted to corresponding materialized views. +/// Data inserted to a source table is also inserted to corresponding materialized views. /// 2. Loading dependencies: specify in which order tables must be loaded during startup. -/// For example a dictionary should be loaded after it's source table and it's written in the graph as "dictionary -> source_table". +/// For example a dictionary should be loaded after it's source table and it's written in the graph as "dictionary -> source_table". /// 3. Referential dependencies: "table -> all tables mentioned in its definition". -/// Referential dependencies are checked to decide if it's safe to drop a table (it can be unsafe if the table is used by another table). +/// Referential dependencies are checked to decide if it's safe to drop a table (it can be unsafe if the table is used by another table). /// /// WARNING: This class doesn't have an embedded mutex, so it must be synchronized outside. class TablesDependencyGraph @@ -98,8 +98,8 @@ public: /// Cyclic dependencies are dependencies like "A->A" or "A->B->C->D->A". void checkNoCyclicDependencies() const; bool hasCyclicDependencies() const; - std::vector getTablesWithCyclicDependencies() const; String describeCyclicDependencies() const; + std::vector getTablesWithCyclicDependencies() const; /// Returns a list of tables sorted by their dependencies: /// tables without dependencies first, then @@ -113,8 +113,12 @@ public: /// Outputs information about this graph as a bunch of logging messages. void log() const; + /// Calculates levels - this is required for checking cyclic dependencies, to sort tables by dependency, and to log the graph. + /// This function is called automatically by the functions which need it, but can be invoked directly. + void calculateLevels() const; + private: - struct Node : public std::enable_shared_from_this + struct Node { StorageID storage_id; @@ -128,28 +132,38 @@ private: /// Calculated lazily. mutable size_t level = 0; + /// Number of dependencies left, used only while we're calculating levels. + mutable size_t num_dependencies_to_count = 0; + explicit Node(const StorageID & storage_id_) : storage_id(storage_id_) {} }; using NodeSharedPtr = std::shared_ptr; - struct LessByLevel + struct Hash { - bool operator()(const Node * left, const Node * right) { return left->level < right->level; } + using is_transparent = void; + size_t operator()(const Node * node) const { return std::hash{}(node); } + size_t operator()(const NodeSharedPtr & node_ptr) const { return operator()(node_ptr.get()); } }; - std::unordered_set nodes; + struct Equal + { + using is_transparent = void; + size_t operator()(const NodeSharedPtr & left, const Node * right) const { return left.get() == right; } + size_t operator()(const NodeSharedPtr & left, const NodeSharedPtr & right) const { return left == right; } + }; + + std::unordered_set nodes; /// Nodes can be found either by UUID or by database name & table name. That's why we need two maps here. std::unordered_map nodes_by_database_and_table_names; std::unordered_map nodes_by_uuid; - /// This is set if both `level` inside each node and `nodes_sorted_by_level_lazy` are calculated. - mutable bool levels_calculated = false; - /// Nodes sorted by their level. Calculated lazily. using NodesSortedByLevel = std::vector; mutable NodesSortedByLevel nodes_sorted_by_level_lazy; + mutable bool levels_calculated = false; const String name_for_logging; mutable Poco::Logger * logger = nullptr; @@ -161,8 +175,7 @@ private: static std::vector getDependencies(const Node & node); static std::vector getDependents(const Node & node); - void setNeedRecalculateLevels(); - void calculateLevels() const; + void setNeedRecalculateLevels() const; const NodesSortedByLevel & getNodesSortedByLevel() const; Poco::Logger * getLogger() const; From 76ba8ab3d43da7c155431378cdf6f1ffcef16e81 Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Sun, 4 Dec 2022 02:04:50 +0100 Subject: [PATCH 14/35] Add new tests. --- src/Backups/RestorerFromBackup.cpp | 1 + .../test_backup_restore_new/test.py | 40 +++++++++++++++++-- .../01155_rename_move_materialized_view.sql | 4 +- 3 files changed, 40 insertions(+), 5 deletions(-) diff --git a/src/Backups/RestorerFromBackup.cpp b/src/Backups/RestorerFromBackup.cpp index 244a51669a1..67db239f31d 100644 --- a/src/Backups/RestorerFromBackup.cpp +++ b/src/Backups/RestorerFromBackup.cpp @@ -674,6 +674,7 @@ void RestorerFromBackup::removeUnresolvedDependencies() void RestorerFromBackup::createTables() { /// We need to create tables considering their dependencies. + tables_dependencies.log(); auto tables_to_create = tables_dependencies.getTablesSortedByDependency(); for (const auto & table_id : tables_to_create) { diff --git a/tests/integration/test_backup_restore_new/test.py b/tests/integration/test_backup_restore_new/test.py index 7eeabde1380..54dd9e41d4b 100644 --- a/tests/integration/test_backup_restore_new/test.py +++ b/tests/integration/test_backup_restore_new/test.py @@ -1180,6 +1180,8 @@ def test_tables_dependency(): t4 = random_table_names[3] t5 = random_table_names[4] t6 = random_table_names[5] + t7 = random_table_names[6] + t8 = random_table_names[7] # Create a materialized view and a dictionary with a local table as source. instance.query( @@ -1193,7 +1195,7 @@ def test_tables_dependency(): instance.query(f"CREATE MATERIALIZED VIEW {t3} TO {t2} AS SELECT x, y FROM {t1}") instance.query( - f"CREATE DICTIONARY {t4} (x Int64, y String) PRIMARY KEY x SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() TABLE '{t1.split('.')[1]}' DB '{t1.split('.')[0]}')) LAYOUT(FLAT()) LIFETIME(0)" + f"CREATE DICTIONARY {t4} (x Int64, y String) PRIMARY KEY x SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() TABLE '{t1.split('.')[1]}' DB '{t1.split('.')[0]}')) LAYOUT(FLAT()) LIFETIME(4)" ) instance.query(f"CREATE TABLE {t5} AS dictionary({t4})") @@ -1202,12 +1204,20 @@ def test_tables_dependency(): f"CREATE TABLE {t6}(x Int64, y String DEFAULT dictGet({t4}, 'y', x)) ENGINE=MergeTree ORDER BY tuple()" ) + instance.query(f"CREATE VIEW {t7} AS SELECT sum(x) FROM (SELECT x FROM {t6})") + + instance.query( + f"CREATE TABLE {t8} AS {t2} ENGINE = Buffer({t2.split('.')[0]}, {t2.split('.')[1]}, 16, 10, 100, 10000, 1000000, 10000000, 100000000)" + ) + # Make backup. backup_name = new_backup_name() instance.query(f"BACKUP DATABASE test, DATABASE test2 TO {backup_name}") # Drop everything in reversive order. def drop(): + instance.query(f"DROP TABLE {t8} NO DELAY") + instance.query(f"DROP TABLE {t7} NO DELAY") instance.query(f"DROP TABLE {t6} NO DELAY") instance.query(f"DROP TABLE {t5} NO DELAY") instance.query(f"DROP DICTIONARY {t4}") @@ -1219,11 +1229,35 @@ def test_tables_dependency(): drop() - # Restore everything and check. + # Restore everything. instance.query(f"RESTORE ALL FROM {backup_name}") + # Check everything is restored. assert instance.query( "SELECT concat(database, '.', name) AS c FROM system.tables WHERE database IN ['test', 'test2'] ORDER BY c" - ) == TSV(sorted([t1, t2, t3, t4, t5, t6])) + ) == TSV(sorted([t1, t2, t3, t4, t5, t6, t7, t8])) + + # Check logs. + instance.query("SYSTEM FLUSH LOGS") + expect_in_logs = [ + f"Table {t1} has no dependencies (level 0)", + f"Table {t2} has no dependencies (level 0)", + ( + f"Table {t3} has 2 dependencies: {t1}, {t2} (level 1)", + f"Table {t3} has 2 dependencies: {t2}, {t1} (level 1)", + ), + f"Table {t4} has 1 dependencies: {t1} (level 1)", + f"Table {t5} has 1 dependencies: {t4} (level 2)", + f"Table {t6} has 1 dependencies: {t4} (level 2)", + f"Table {t7} has 1 dependencies: {t6} (level 3)", + f"Table {t8} has 1 dependencies: {t2} (level 1)", + ] + for expect in expect_in_logs: + assert any( + [ + instance.contains_in_log(f"RestorerFromBackup: {x}") + for x in tuple(expect) + ] + ) drop() diff --git a/tests/queries/0_stateless/01155_rename_move_materialized_view.sql b/tests/queries/0_stateless/01155_rename_move_materialized_view.sql index c3cc0bbb9eb..1eff1c0779a 100644 --- a/tests/queries/0_stateless/01155_rename_move_materialized_view.sql +++ b/tests/queries/0_stateless/01155_rename_move_materialized_view.sql @@ -39,7 +39,7 @@ RENAME TABLE test_01155_ordinary.mv1 TO test_01155_atomic.mv1; RENAME TABLE test_01155_ordinary.mv2 TO test_01155_atomic.mv2; RENAME TABLE test_01155_ordinary.dst TO test_01155_atomic.dst; RENAME TABLE test_01155_ordinary.src TO test_01155_atomic.src; -SET check_table_dependencies=0; -- Otherwise we'll get error "test_01155_atomic.dict depends on test_01155_ordinary.dist" in the next line. +SET check_table_dependencies=0; -- Otherwise we'll get error "test_01155_ordinary.dict depends on test_01155_ordinary.dist" in the next line. RENAME TABLE test_01155_ordinary.dist TO test_01155_atomic.dist; SET check_table_dependencies=1; RENAME DICTIONARY test_01155_ordinary.dict TO test_01155_atomic.dict; @@ -65,7 +65,7 @@ SELECT dictGet('test_01155_ordinary.dict', 'x', 'after renaming database'); SELECT database, substr(name, 1, 10) FROM system.tables WHERE database like 'test_01155_%'; -- Move tables back -SET check_table_dependencies=0; -- Otherwise we'll get error "test_01155_atomic.dict depends on test_01155_ordinary.dist" in the next line. +SET check_table_dependencies=0; -- Otherwise we'll get error "test_01155_ordinary.dict depends on test_01155_ordinary.dist" in the next line. RENAME DATABASE test_01155_ordinary TO test_01155_atomic; SET check_table_dependencies=1; From 0207637f6b14aff1016ddbf9e369fd98a6c3c8d2 Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Sun, 4 Dec 2022 12:33:06 +0100 Subject: [PATCH 15/35] Use query context instead of the global context in DDLDependencyVisitor. --- src/Backups/RestorerFromBackup.cpp | 2 +- src/Databases/DDLDependencyVisitor.cpp | 23 +++++++++++------------ src/Databases/DDLDependencyVisitor.h | 6 +++--- 3 files changed, 15 insertions(+), 16 deletions(-) diff --git a/src/Backups/RestorerFromBackup.cpp b/src/Backups/RestorerFromBackup.cpp index 67db239f31d..75ec2579f52 100644 --- a/src/Backups/RestorerFromBackup.cpp +++ b/src/Backups/RestorerFromBackup.cpp @@ -346,7 +346,7 @@ void RestorerFromBackup::findTableInBackup(const QualifiedTableName & table_name res_table_info.has_data = backup->hasFiles(data_path_in_backup); res_table_info.data_path_in_backup = data_path_in_backup; - tables_dependencies.addDependencies(table_name, getDependenciesFromCreateQuery(context->getGlobalContext(), table_name, create_table_query)); + tables_dependencies.addDependencies(table_name, getDependenciesFromCreateQuery(context, table_name, create_table_query)); if (partitions) { diff --git a/src/Databases/DDLDependencyVisitor.cpp b/src/Databases/DDLDependencyVisitor.cpp index 525f4fb7b12..a33fc0b6b27 100644 --- a/src/Databases/DDLDependencyVisitor.cpp +++ b/src/Databases/DDLDependencyVisitor.cpp @@ -23,7 +23,7 @@ namespace { /// TO target_table (for materialized views) if (to_table.database.empty()) - to_table.database = data.default_database; + to_table.database = data.current_database; data.dependencies.emplace(to_table); } @@ -32,7 +32,7 @@ namespace { /// AS table_name if (as_table.database.empty()) - as_table.database = data.default_database; + as_table.database = data.current_database; data.dependencies.emplace(as_table); } } @@ -60,7 +60,7 @@ namespace if (qualified_name.database.empty()) { /// It can be table/dictionary from default database or XML dictionary, but we cannot distinguish it here. - qualified_name.database = data.default_database; + qualified_name.database = data.current_database; } data.dependencies.emplace(qualified_name); @@ -112,7 +112,7 @@ namespace if (qualified_name.database.empty()) { /// It can be table/dictionary from default database or XML dictionary, but we cannot distinguish it here. - qualified_name.database = data.default_database; + qualified_name.database = data.current_database; } data.dependencies.emplace(std::move(qualified_name)); } @@ -141,7 +141,7 @@ namespace return; if (qualified_name.database.empty()) - qualified_name.database = data.default_database; + qualified_name.database = data.current_database; data.dependencies.emplace(qualified_name); } @@ -181,27 +181,26 @@ namespace if (!dictionary.source || dictionary.source->name != "clickhouse" || !dictionary.source->elements) return; - auto config = getDictionaryConfigurationFromAST(data.create_query->as(), data.global_context); - auto info = getInfoIfClickHouseDictionarySource(config, data.global_context); + auto config = getDictionaryConfigurationFromAST(data.create_query->as(), data.context); + auto info = getInfoIfClickHouseDictionarySource(config, data.context); if (!info || !info->is_local) return; if (info->table_name.database.empty()) - info->table_name.database = data.default_database; + info->table_name.database = data.current_database; data.dependencies.emplace(std::move(info->table_name)); } } -TableNamesSet getDependenciesFromCreateQuery(const ContextPtr & global_context, const QualifiedTableName & table_name, const ASTPtr & ast) +TableNamesSet getDependenciesFromCreateQuery(const ContextPtr & context, const QualifiedTableName & table_name, const ASTPtr & ast) { - assert(global_context == global_context->getGlobalContext()); DDLDependencyVisitor::Data data; data.table_name = table_name; - data.default_database = global_context->getCurrentDatabase(); + data.current_database = context->getCurrentDatabase(); data.create_query = ast; - data.global_context = global_context; + data.context = context; DDLDependencyVisitor::Visitor visitor{data}; visitor.visit(ast); data.dependencies.erase(data.table_name); diff --git a/src/Databases/DDLDependencyVisitor.h b/src/Databases/DDLDependencyVisitor.h index 9709eeec9d3..5f56d0f9f5a 100644 --- a/src/Databases/DDLDependencyVisitor.h +++ b/src/Databases/DDLDependencyVisitor.h @@ -12,7 +12,7 @@ using TableNamesSet = std::unordered_set; /// Returns a list of all tables explicitly referenced in the create query of a specified table. /// For example, a column default expression can use dictGet() and thus reference a dictionary. /// Does not validate AST, works a best-effort way. -TableNamesSet getDependenciesFromCreateQuery(const ContextPtr & global_context, const QualifiedTableName & table_name, const ASTPtr & ast); +TableNamesSet getDependenciesFromCreateQuery(const ContextPtr & context, const QualifiedTableName & table_name, const ASTPtr & ast); /// Visits ASTCreateQuery and extracts the names of all tables explicitly referenced in the create query. class DDLDependencyVisitor @@ -22,8 +22,8 @@ public: { ASTPtr create_query; QualifiedTableName table_name; - String default_database; - ContextPtr global_context; + String current_database; + ContextPtr context; TableNamesSet dependencies; }; From 5aaff60650e2aaa420807a34f7a0b7a3a828c266 Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Mon, 5 Dec 2022 19:37:41 +0100 Subject: [PATCH 16/35] Fix referential dependencies when host & post in a clickHouse dictionary source are set by default. --- .../getDictionaryConfigurationFromAST.cpp | 11 ++++++----- tests/integration/test_backup_restore_new/test.py | 9 ++++++++- 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/src/Dictionaries/getDictionaryConfigurationFromAST.cpp b/src/Dictionaries/getDictionaryConfigurationFromAST.cpp index 4868413dabd..6174cf5da46 100644 --- a/src/Dictionaries/getDictionaryConfigurationFromAST.cpp +++ b/src/Dictionaries/getDictionaryConfigurationFromAST.cpp @@ -623,20 +623,21 @@ getInfoIfClickHouseDictionarySource(DictionaryConfigurationPtr & config, Context { ClickHouseDictionarySourceInfo info; - String host = config->getString("dictionary.source.clickhouse.host", ""); - UInt16 port = config->getUInt("dictionary.source.clickhouse.port", 0); + bool secure = config->getBool("dictionary.source.clickhouse.secure", false); + UInt16 default_port = secure ? global_context->getTCPPortSecure().value_or(0) : global_context->getTCPPort(); + + String host = config->getString("dictionary.source.clickhouse.host", "localhost"); + UInt16 port = config->getUInt("dictionary.source.clickhouse.port", default_port); String database = config->getString("dictionary.source.clickhouse.db", ""); String table = config->getString("dictionary.source.clickhouse.table", ""); - bool secure = config->getBool("dictionary.source.clickhouse.secure", false); - if (host.empty() || port == 0 || table.empty()) + if (table.empty()) return {}; info.table_name = {database, table}; try { - UInt16 default_port = secure ? global_context->getTCPPortSecure().value_or(0) : global_context->getTCPPort(); if (isLocalAddress({host, port}, default_port)) info.is_local = true; } diff --git a/tests/integration/test_backup_restore_new/test.py b/tests/integration/test_backup_restore_new/test.py index 54dd9e41d4b..90e8acc702d 100644 --- a/tests/integration/test_backup_restore_new/test.py +++ b/tests/integration/test_backup_restore_new/test.py @@ -1182,6 +1182,7 @@ def test_tables_dependency(): t6 = random_table_names[5] t7 = random_table_names[6] t8 = random_table_names[7] + t9 = random_table_names[8] # Create a materialized view and a dictionary with a local table as source. instance.query( @@ -1210,12 +1211,17 @@ def test_tables_dependency(): f"CREATE TABLE {t8} AS {t2} ENGINE = Buffer({t2.split('.')[0]}, {t2.split('.')[1]}, 16, 10, 100, 10000, 1000000, 10000000, 100000000)" ) + instance.query( + f"CREATE DICTIONARY {t9} (x Int64, y String) PRIMARY KEY x SOURCE(CLICKHOUSE(TABLE '{t1.split('.')[1]}' DB '{t1.split('.')[0]}')) LAYOUT(FLAT()) LIFETIME(9)" + ) + # Make backup. backup_name = new_backup_name() instance.query(f"BACKUP DATABASE test, DATABASE test2 TO {backup_name}") # Drop everything in reversive order. def drop(): + instance.query(f"DROP DICTIONARY {t9}") instance.query(f"DROP TABLE {t8} NO DELAY") instance.query(f"DROP TABLE {t7} NO DELAY") instance.query(f"DROP TABLE {t6} NO DELAY") @@ -1235,7 +1241,7 @@ def test_tables_dependency(): # Check everything is restored. assert instance.query( "SELECT concat(database, '.', name) AS c FROM system.tables WHERE database IN ['test', 'test2'] ORDER BY c" - ) == TSV(sorted([t1, t2, t3, t4, t5, t6, t7, t8])) + ) == TSV(sorted([t1, t2, t3, t4, t5, t6, t7, t8, t9])) # Check logs. instance.query("SYSTEM FLUSH LOGS") @@ -1251,6 +1257,7 @@ def test_tables_dependency(): f"Table {t6} has 1 dependencies: {t4} (level 2)", f"Table {t7} has 1 dependencies: {t6} (level 3)", f"Table {t8} has 1 dependencies: {t2} (level 1)", + f"Table {t9} has 1 dependencies: {t1} (level 1)", ] for expect in expect_in_logs: assert any( From 4f0d1c5e0f30ae0a3d27e064a5de323d4ca9d316 Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Sat, 10 Dec 2022 23:50:00 +0100 Subject: [PATCH 17/35] Fix copying of query contexts for async backup/restore. --- src/Backups/BackupsWorker.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/Backups/BackupsWorker.cpp b/src/Backups/BackupsWorker.cpp index 704562488b1..affcea94c57 100644 --- a/src/Backups/BackupsWorker.cpp +++ b/src/Backups/BackupsWorker.cpp @@ -181,6 +181,7 @@ OperationID BackupsWorker::startMakingBackup(const ASTPtr & query, const Context /// For ON CLUSTER queries we will need to change some settings. /// For ASYNC queries we have to clone the context anyway. context_in_use = mutable_context = Context::createCopy(context); + mutable_context->makeQueryContext(); } if (backup_settings.async) @@ -400,6 +401,7 @@ OperationID BackupsWorker::startRestoring(const ASTPtr & query, ContextMutablePt /// For ON CLUSTER queries we will need to change some settings. /// For ASYNC queries we have to clone the context anyway. context_in_use = Context::createCopy(context); + context_in_use->makeQueryContext(); } if (restore_settings.async) From efbf0f7398cedca213deb84d8c3d2720c9ae79a0 Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Sat, 10 Dec 2022 23:57:14 +0100 Subject: [PATCH 18/35] Move DDLDependencyVisitor from header to cpp file. --- src/Databases/DDLDependencyVisitor.cpp | 438 ++++++++++++++----------- src/Databases/DDLDependencyVisitor.h | 24 +- 2 files changed, 255 insertions(+), 207 deletions(-) diff --git a/src/Databases/DDLDependencyVisitor.cpp b/src/Databases/DDLDependencyVisitor.cpp index a33fc0b6b27..a6f7bda11e2 100644 --- a/src/Databases/DDLDependencyVisitor.cpp +++ b/src/Databases/DDLDependencyVisitor.cpp @@ -1,6 +1,7 @@ #include #include #include +#include #include #include #include @@ -15,224 +16,289 @@ namespace DB namespace { - /// CREATE TABLE or CREATE DICTIONARY or CREATE VIEW or CREATE TEMPORARY TABLE or CREATE DATABASE query. - void visitCreateQuery(const ASTCreateQuery & create, DDLDependencyVisitor::Data & data) + /// Data for DDLDependencyVisitor. + /// Used to visits ASTCreateQuery and extracts the names of all tables explicitly referenced in the create query. + class DDLDependencyVisitorData { - QualifiedTableName to_table{create.to_table_id.database_name, create.to_table_id.table_name}; - if (!to_table.table.empty()) + public: + DDLDependencyVisitorData(const ContextPtr & context_, const QualifiedTableName & table_name_, const ASTPtr & ast_) + : create_query(ast_), table_name(table_name_), current_database(context_->getCurrentDatabase()), context(context_) { - /// TO target_table (for materialized views) - if (to_table.database.empty()) - to_table.database = data.current_database; - data.dependencies.emplace(to_table); } - QualifiedTableName as_table{create.as_database, create.as_table}; - if (!as_table.table.empty()) + /// Acquire the result of visiting the create query. + TableNamesSet getDependencies() && { - /// AS table_name - if (as_table.database.empty()) - as_table.database = data.current_database; - data.dependencies.emplace(as_table); - } - } - - /// ASTTableExpression represents a reference to a table in SELECT query. - /// DDLDependencyVisitor should handle ASTTableExpression because some CREATE queries can contain SELECT queries after AS - /// (for example, CREATE VIEW). - void visitTableExpression(const ASTTableExpression & expr, DDLDependencyVisitor::Data & data) - { - if (!expr.database_and_table_name) - return; - - const ASTIdentifier * identifier = dynamic_cast(expr.database_and_table_name.get()); - if (!identifier) - return; - - auto table_identifier = identifier->createTable(); - if (!table_identifier) - return; - - QualifiedTableName qualified_name{table_identifier->getDatabaseName(), table_identifier->shortName()}; - if (qualified_name.table.empty()) - return; - - if (qualified_name.database.empty()) - { - /// It can be table/dictionary from default database or XML dictionary, but we cannot distinguish it here. - qualified_name.database = data.current_database; + dependencies.erase(table_name); + return std::move(dependencies); } - data.dependencies.emplace(qualified_name); - } + bool needChildVisit(const ASTPtr & child) const { return !skip_asts.contains(child.get()); } - /// Extracts a table name with optional database written in the form db_name.table_name (as identifier) or 'db_name.table_name' (as string). - void extractQualifiedTableNameFromArgument(const ASTFunction & function, DDLDependencyVisitor::Data & data, size_t arg_idx) - { - /// Just ignore incorrect arguments, proper exception will be thrown later - if (!function.arguments || function.arguments->children.size() <= arg_idx) - return; - - QualifiedTableName qualified_name; - - const auto * expr_list = function.arguments->as(); - if (!expr_list) - return; - - const auto * arg = expr_list->children[arg_idx].get(); - if (const auto * literal = arg->as()) + void visit(const ASTPtr & ast) { - if (literal->value.getType() != Field::Types::String) + if (auto * create = ast->as()) + { + visitCreateQuery(*create); + } + else if (auto * dictionary = ast->as()) + { + visitDictionaryDef(*dictionary); + } + else if (auto * expr = ast->as()) + { + visitTableExpression(*expr); + } + else if (const auto * function = ast->as()) + { + if (function->kind == ASTFunction::Kind::TABLE_ENGINE) + visitTableEngine(*function); + else + visitFunction(*function); + } + } + + private: + ASTPtr create_query; + std::unordered_set skip_asts; + QualifiedTableName table_name; + String current_database; + ContextPtr context; + TableNamesSet dependencies; + + /// CREATE TABLE or CREATE DICTIONARY or CREATE VIEW or CREATE TEMPORARY TABLE or CREATE DATABASE query. + void visitCreateQuery(const ASTCreateQuery & create) + { + QualifiedTableName to_table{create.to_table_id.database_name, create.to_table_id.table_name}; + if (!to_table.table.empty()) + { + /// TO target_table (for materialized views) + if (to_table.database.empty()) + to_table.database = current_database; + dependencies.emplace(to_table); + } + + QualifiedTableName as_table{create.as_database, create.as_table}; + if (!as_table.table.empty()) + { + /// AS table_name + if (as_table.database.empty()) + as_table.database = current_database; + dependencies.emplace(as_table); + } + } + + /// The definition of a dictionary: SOURCE(CLICKHOUSE(...)) LAYOUT(...) LIFETIME(...) + void visitDictionaryDef(const ASTDictionary & dictionary) + { + if (!dictionary.source || dictionary.source->name != "clickhouse" || !dictionary.source->elements) return; - auto maybe_qualified_name = QualifiedTableName::tryParseFromString(literal->value.get()); - /// Just return if name if invalid - if (!maybe_qualified_name) + auto config = getDictionaryConfigurationFromAST(create_query->as(), context); + auto info = getInfoIfClickHouseDictionarySource(config, context); + + /// We consider only dependencies on local tables. + if (!info || !info->is_local) return; - qualified_name = std::move(*maybe_qualified_name); + if (info->table_name.database.empty()) + info->table_name.database = current_database; + dependencies.emplace(std::move(info->table_name)); } - else if (const auto * identifier = dynamic_cast(arg)) + + /// ASTTableExpression represents a reference to a table in SELECT query. + /// DDLDependencyVisitor should handle ASTTableExpression because some CREATE queries can contain SELECT queries after AS + /// (for example, CREATE VIEW). + void visitTableExpression(const ASTTableExpression & expr) { - /// ASTIdentifier or ASTTableIdentifier + if (!expr.database_and_table_name) + return; + + const ASTIdentifier * identifier = dynamic_cast(expr.database_and_table_name.get()); + if (!identifier) + return; + auto table_identifier = identifier->createTable(); - /// Just return if table identified is invalid if (!table_identifier) return; - qualified_name.database = table_identifier->getDatabaseName(); - qualified_name.table = table_identifier->shortName(); - } - else - { - /// Just return because we don't validate AST in this function. - return; + QualifiedTableName qualified_name{table_identifier->getDatabaseName(), table_identifier->shortName()}; + if (qualified_name.table.empty()) + return; + + if (qualified_name.database.empty()) + { + /// It can be table/dictionary from default database or XML dictionary, but we cannot distinguish it here. + qualified_name.database = current_database; + } + + dependencies.emplace(qualified_name); } - if (qualified_name.database.empty()) + /// Finds dependencies of a table engine. + void visitTableEngine(const ASTFunction & table_engine) { - /// It can be table/dictionary from default database or XML dictionary, but we cannot distinguish it here. - qualified_name.database = data.current_database; - } - data.dependencies.emplace(std::move(qualified_name)); - } + /// Dictionary(db_name.dictionary_name) + if (table_engine.name == "Dictionary") + addQualifiedNameFromArgument(table_engine, 0); - /// Extracts a table name with database written in the form 'db_name', 'table_name' (two strings). - void extractDatabaseAndTableNameFromArguments(const ASTFunction & function, DDLDependencyVisitor::Data & data, size_t database_arg_idx, size_t table_arg_idx) + /// Buffer('db_name', 'dest_table_name') + if (table_engine.name == "Buffer") + addDatabaseAndTableNameFromArguments(table_engine, 0, 1); + } + + /// Finds dependencies of a function. + void visitFunction(const ASTFunction & function) + { + if (function.name == "joinGet" || function.name == "dictHas" || function.name == "dictIsIn" || function.name.starts_with("dictGet")) + { + /// dictGet('dict_name', attr_names, id_expr) + /// dictHas('dict_name', id_expr) + /// joinGet(join_storage_table_name, `value_column`, join_keys) + addQualifiedNameFromArgument(function, 0); + } + else if (function.name == "in" || function.name == "notIn" || function.name == "globalIn" || function.name == "globalNotIn") + { + /// in(x, table_name) - function for evaluating (x IN table_name) + addQualifiedNameFromArgument(function, 1); + } + else if (function.name == "dictionary") + { + /// dictionary(dict_name) + addQualifiedNameFromArgument(function, 0); + } + } + + /// Gets an argument as a string, evaluates constants if necessary. + std::optional tryGetStringFromArgument(const ASTFunction & function, size_t arg_idx) const + { + if (!function.arguments) + return {}; + + const ASTs & args = function.arguments->children; + if (arg_idx >= args.size()) + return {}; + + const auto & arg = args[arg_idx]; + + if (const auto * literal = arg->as()) + { + if (literal->value.getType() != Field::Types::String)) + return {}; + return literal->value.safeGet(); + } + else if (const auto * identifier = dynamic_cast(arg.get())) + { + return identifier->name(); + } + else + { + return nullptr; + } + } + + /// Gets an argument as a qualified table name. + /// Accepts forms db_name.table_name (as an identifier) and 'db_name.table_name' (as a string). + /// The function doesn't replace an empty database name with the current_database (the caller must do that). + std::optional + tryGetQualifiedNameFromArgument(const ASTFunction & function, size_t arg_idx, bool apply_current_database = true) const + { + if (!function.arguments) + return {}; + + const ASTs & args = function.arguments->children; + if (arg_idx >= args.size()) + return {}; + + const auto & arg = args[arg_idx]; + QualifiedTableName qualified_name; + + if (const auto * identifier = dynamic_cast(arg.get())) + { + /// ASTIdentifier or ASTTableIdentifier + auto table_identifier = identifier->createTable(); + if (!table_identifier) + return {}; + + qualified_name.database = table_identifier->getDatabaseName(); + qualified_name.table = table_identifier->shortName(); + } + else + { + auto qualified_name_as_string = tryGetStringFromArgument(function, arg_idx); + if (!qualified_name_as_string) + return {}; + + auto maybe_qualified_name = QualifiedTableName::tryParseFromString(*qualified_name_as_string); + if (!maybe_qualified_name) + return {}; + + qualified_name = std::move(maybe_qualified_name).value(); + } + + if (qualified_name.database.empty() && apply_current_database) + qualified_name.database = current_database; + + return qualified_name; + } + + /// Adds a qualified table name from an argument to the collection of dependencies. + /// Accepts forms db_name.table_name (as an identifier) and 'db_name.table_name' (as a string). + void addQualifiedNameFromArgument(const ASTFunction & function, size_t arg_idx) + { + if (auto qualified_name = tryGetQualifiedNameFromArgument(function, arg_idx)) + dependencies.emplace(std::move(qualified_name).value()); + } + + /// Returns a database name and a table name extracted from two separate arguments. + std::optional tryGetDatabaseAndTableNameFromArguments( + const ASTFunction & function, size_t database_arg_idx, size_t table_arg_idx, bool apply_current_database = true) const + { + auto database = tryGetStringFromArgument(function, database_arg_idx); + if (!database) + return {}; + + auto table = tryGetStringFromArgument(function, table_arg_idx); + if (!table) + return {}; + + QualifiedTableName qualified_name; + qualified_name.database = std::move(database).value(); + qualified_name.table = std::move(table).value(); + + if (qualified_name.database.empty() && apply_current_database) + qualified_name.database = current_database; + + return qualified_name; + } + + /// Adds a database name and a table name from two separate arguments to the collection of dependencies. + void addDatabaseAndTableNameFromArguments(const ASTFunction & function, size_t database_arg_idx, size_t table_arg_idx) + { + if (auto qualified_name = tryGetDatabaseAndTableNameFromArguments(function, database_arg_idx, table_arg_idx)) + dependencies.emplace(std::move(qualified_name).value()); + } + }; + + /// Visits ASTCreateQuery and extracts the names of all tables explicitly referenced in the create query. + class DDLDependencyVisitor { - /// Just ignore incorrect arguments, proper exception will be thrown later - if (!function.arguments || (function.arguments->children.size() <= database_arg_idx) - || (function.arguments->children.size() <= table_arg_idx)) - return; + public: + using Data = DDLDependencyVisitorData; + using Visitor = ConstInDepthNodeVisitor; - const auto * expr_list = function.arguments->as(); - if (!expr_list) - return; - - const auto * database_literal = expr_list->children[database_arg_idx]->as(); - const auto * table_name_literal = expr_list->children[table_arg_idx]->as(); - - if (!database_literal || !table_name_literal || (database_literal->value.getType() != Field::Types::String) - || (table_name_literal->value.getType() != Field::Types::String)) - return; - - QualifiedTableName qualified_name{database_literal->value.get(), table_name_literal->value.get()}; - if (qualified_name.table.empty()) - return; - - if (qualified_name.database.empty()) - qualified_name.database = data.current_database; - - data.dependencies.emplace(qualified_name); - } - - void visitFunction(const ASTFunction & function, DDLDependencyVisitor::Data & data) - { - if (function.name == "joinGet" || function.name == "dictHas" || function.name == "dictIsIn" || function.name.starts_with("dictGet")) - { - /// dictGet('dict_name', attr_names, id_expr) - /// dictHas('dict_name', id_expr) - /// joinGet(join_storage_table_name, `value_column`, join_keys) - extractQualifiedTableNameFromArgument(function, data, 0); - } - else if (function.name == "in" || function.name == "notIn" || function.name == "globalIn" || function.name == "globalNotIn") - { - /// in(x, table_name) - function for evaluating (x IN table_name) - extractQualifiedTableNameFromArgument(function, data, 1); - } - else if (function.name == "dictionary") - { - /// dictionary(dict_name) - extractQualifiedTableNameFromArgument(function, data, 0); - } - } - - void visitTableEngine(const ASTFunction & table_engine, DDLDependencyVisitor::Data & data) - { - if (table_engine.name == "Dictionary") - extractQualifiedTableNameFromArgument(table_engine, data, 0); - - if (table_engine.name == "Buffer") - extractDatabaseAndTableNameFromArguments(table_engine, data, 0, 1); - } - - void visitDictionaryDef(const ASTDictionary & dictionary, DDLDependencyVisitor::Data & data) - { - if (!dictionary.source || dictionary.source->name != "clickhouse" || !dictionary.source->elements) - return; - - auto config = getDictionaryConfigurationFromAST(data.create_query->as(), data.context); - auto info = getInfoIfClickHouseDictionarySource(config, data.context); - - if (!info || !info->is_local) - return; - - if (info->table_name.database.empty()) - info->table_name.database = data.current_database; - data.dependencies.emplace(std::move(info->table_name)); - } + static bool needChildVisit(const ASTPtr &, const ASTPtr & child, const Data & data) { return data.needChildVisit(child); } + static void visit(const ASTPtr & ast, Data & data) { data.visit(ast); } + }; } TableNamesSet getDependenciesFromCreateQuery(const ContextPtr & context, const QualifiedTableName & table_name, const ASTPtr & ast) { - DDLDependencyVisitor::Data data; - data.table_name = table_name; - data.current_database = context->getCurrentDatabase(); - data.create_query = ast; - data.context = context; + DDLDependencyVisitor::Data data{context, table_name, ast}; DDLDependencyVisitor::Visitor visitor{data}; visitor.visit(ast); - data.dependencies.erase(data.table_name); - return data.dependencies; -} - -void DDLDependencyVisitor::visit(const ASTPtr & ast, Data & data) -{ - if (auto * create = ast->as()) - { - visitCreateQuery(*create, data); - } - else if (auto * dictionary = ast->as()) - { - visitDictionaryDef(*dictionary, data); - } - else if (auto * expr = ast->as()) - { - visitTableExpression(*expr, data); - } - else if (const auto * function = ast->as()) - { - if (function->kind == ASTFunction::Kind::TABLE_ENGINE) - visitTableEngine(*function, data); - else - visitFunction(*function, data); - } -} - -bool DDLDependencyVisitor::needChildVisit(const ASTPtr &, const ASTPtr &) -{ - return true; + return std::move(data).getDependencies(); } } diff --git a/src/Databases/DDLDependencyVisitor.h b/src/Databases/DDLDependencyVisitor.h index 5f56d0f9f5a..29ea6298b04 100644 --- a/src/Databases/DDLDependencyVisitor.h +++ b/src/Databases/DDLDependencyVisitor.h @@ -1,8 +1,9 @@ #pragma once -#include -#include #include +#include +#include +#include namespace DB @@ -14,23 +15,4 @@ using TableNamesSet = std::unordered_set; /// Does not validate AST, works a best-effort way. TableNamesSet getDependenciesFromCreateQuery(const ContextPtr & context, const QualifiedTableName & table_name, const ASTPtr & ast); -/// Visits ASTCreateQuery and extracts the names of all tables explicitly referenced in the create query. -class DDLDependencyVisitor -{ -public: - struct Data - { - ASTPtr create_query; - QualifiedTableName table_name; - String current_database; - ContextPtr context; - TableNamesSet dependencies; - }; - - using Visitor = ConstInDepthNodeVisitor; - - static void visit(const ASTPtr & ast, Data & data); - static bool needChildVisit(const ASTPtr & node, const ASTPtr & child); -}; - } From fc0f29821b48fc35d29290a9816c2f2ff2d48465 Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Sun, 11 Dec 2022 00:36:19 +0100 Subject: [PATCH 19/35] Use constant evaluation when calculating referential dependencies. --- src/Databases/DDLDependencyVisitor.cpp | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/src/Databases/DDLDependencyVisitor.cpp b/src/Databases/DDLDependencyVisitor.cpp index a6f7bda11e2..50130b6c89a 100644 --- a/src/Databases/DDLDependencyVisitor.cpp +++ b/src/Databases/DDLDependencyVisitor.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include #include #include @@ -180,21 +181,21 @@ namespace return {}; const auto & arg = args[arg_idx]; + ASTPtr evaluated; - if (const auto * literal = arg->as()) + try { - if (literal->value.getType() != Field::Types::String)) - return {}; - return literal->value.safeGet(); + evaluated = evaluateConstantExpressionOrIdentifierAsLiteral(arg, context); } - else if (const auto * identifier = dynamic_cast(arg.get())) + catch (...) { - return identifier->name(); - } - else - { - return nullptr; + return {}; } + + const auto * literal = evaluated->as(); + if (!literal || (literal->value.getType() != Field::Types::String)) + return {}; + return literal->value.safeGet(); } /// Gets an argument as a qualified table name. From 659a79e53f35a66d7c1cfe9da8f6c86a54922a0d Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Mon, 12 Dec 2022 17:29:37 +0000 Subject: [PATCH 20/35] fix LOGICAL_ERROR if fetch of projection was stopped --- src/Storages/MergeTree/DataPartsExchange.cpp | 83 ++++++++++++------- ...ero_copy_projection_cancel_fetch.reference | 2 + ...02494_zero_copy_projection_cancel_fetch.sh | 66 +++++++++++++++ 3 files changed, 122 insertions(+), 29 deletions(-) create mode 100644 tests/queries/0_stateless/02494_zero_copy_projection_cancel_fetch.reference create mode 100755 tests/queries/0_stateless/02494_zero_copy_projection_cancel_fetch.sh diff --git a/src/Storages/MergeTree/DataPartsExchange.cpp b/src/Storages/MergeTree/DataPartsExchange.cpp index 4f9c9ffd596..66f91aa6cd2 100644 --- a/src/Storages/MergeTree/DataPartsExchange.cpp +++ b/src/Storages/MergeTree/DataPartsExchange.cpp @@ -794,8 +794,6 @@ void Fetcher::downloadBasePartOrProjectionPartToDiskRemoteMeta( /// NOTE The is_cancelled flag also makes sense to check every time you read over the network, /// performing a poll with a not very large timeout. /// And now we check it only between read chunks (in the `copyData` function). - data_part_storage->removeSharedRecursive(true); - data_part_storage->commitTransaction(); throw Exception("Fetching of part was cancelled", ErrorCodes::ABORTED); } @@ -855,7 +853,6 @@ void Fetcher::downloadBaseOrProjectionPartToDisk( /// NOTE The is_cancelled flag also makes sense to check every time you read over the network, /// performing a poll with a not very large timeout. /// And now we check it only between read chunks (in the `copyData` function). - data_part_storage->removeRecursive(); throw Exception("Fetching of part was cancelled", ErrorCodes::ABORTED); } @@ -934,22 +931,36 @@ MergeTreeData::MutableDataPartPtr Fetcher::downloadPartToDisk( CurrentMetrics::Increment metric_increment{CurrentMetrics::ReplicatedFetch}; - for (size_t i = 0; i < projections; ++i) + try { - String projection_name; - readStringBinary(projection_name, in); - MergeTreeData::DataPart::Checksums projection_checksum; + for (size_t i = 0; i < projections; ++i) + { + String projection_name; + readStringBinary(projection_name, in); + MergeTreeData::DataPart::Checksums projection_checksum; - auto projection_part_storage = data_part_storage->getProjection(projection_name + ".proj"); - projection_part_storage->createDirectories(); - downloadBaseOrProjectionPartToDisk( - replica_path, projection_part_storage, sync, in, projection_checksum, throttler); - checksums.addFile( - projection_name + ".proj", projection_checksum.getTotalSizeOnDisk(), projection_checksum.getTotalChecksumUInt128()); + auto projection_part_storage = data_part_storage->getProjection(projection_name + ".proj"); + projection_part_storage->createDirectories(); + downloadBaseOrProjectionPartToDisk( + replica_path, projection_part_storage, sync, in, projection_checksum, throttler); + checksums.addFile( + projection_name + ".proj", projection_checksum.getTotalSizeOnDisk(), projection_checksum.getTotalChecksumUInt128()); + } + + // Download the base part + downloadBaseOrProjectionPartToDisk(replica_path, data_part_storage, sync, in, checksums, throttler); + } + catch (const Exception & e) + { + /// Remove the whole part directory if fetch of base + /// part or fetch of any projection was stopped. + if (e.code() == ErrorCodes::ABORTED) + { + data_part_storage->removeRecursive(); + data_part_storage->commitTransaction(); + } + throw; } - - // Download the base part - downloadBaseOrProjectionPartToDisk(replica_path, data_part_storage, sync, in, checksums, throttler); assertEOF(in); data_part_storage->commitTransaction(); @@ -1007,23 +1018,37 @@ MergeTreeData::MutableDataPartPtr Fetcher::downloadPartToDiskRemoteMeta( data_part_storage->createDirectories(); - for (size_t i = 0; i < projections; ++i) + try { - String projection_name; - readStringBinary(projection_name, in); - MergeTreeData::DataPart::Checksums projection_checksum; + for (size_t i = 0; i < projections; ++i) + { + String projection_name; + readStringBinary(projection_name, in); + MergeTreeData::DataPart::Checksums projection_checksum; + + auto projection_part_storage = data_part_storage->getProjection(projection_name + ".proj"); + projection_part_storage->createDirectories(); + downloadBasePartOrProjectionPartToDiskRemoteMeta( + replica_path, projection_part_storage, in, projection_checksum, throttler); + + checksums.addFile( + projection_name + ".proj", projection_checksum.getTotalSizeOnDisk(), projection_checksum.getTotalChecksumUInt128()); + } - auto projection_part_storage = data_part_storage->getProjection(projection_name + ".proj"); - projection_part_storage->createDirectories(); downloadBasePartOrProjectionPartToDiskRemoteMeta( - replica_path, projection_part_storage, in, projection_checksum, throttler); - - checksums.addFile( - projection_name + ".proj", projection_checksum.getTotalSizeOnDisk(), projection_checksum.getTotalChecksumUInt128()); + replica_path, data_part_storage, in, checksums, throttler); + } + catch (const Exception & e) + { + if (e.code() == ErrorCodes::ABORTED) + { + /// Remove the whole part directory if fetch of base + /// part or fetch of any projection was stopped. + data_part_storage->removeSharedRecursive(true); + data_part_storage->commitTransaction(); + } + throw; } - - downloadBasePartOrProjectionPartToDiskRemoteMeta( - replica_path, data_part_storage, in, checksums, throttler); assertEOF(in); MergeTreeData::MutableDataPartPtr new_data_part; diff --git a/tests/queries/0_stateless/02494_zero_copy_projection_cancel_fetch.reference b/tests/queries/0_stateless/02494_zero_copy_projection_cancel_fetch.reference new file mode 100644 index 00000000000..5878ba47225 --- /dev/null +++ b/tests/queries/0_stateless/02494_zero_copy_projection_cancel_fetch.reference @@ -0,0 +1,2 @@ +1000 +1000 diff --git a/tests/queries/0_stateless/02494_zero_copy_projection_cancel_fetch.sh b/tests/queries/0_stateless/02494_zero_copy_projection_cancel_fetch.sh new file mode 100755 index 00000000000..b72c3eb56c7 --- /dev/null +++ b/tests/queries/0_stateless/02494_zero_copy_projection_cancel_fetch.sh @@ -0,0 +1,66 @@ +#!/usr/bin/env bash +# Tags: no-fasttest + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT -n --query " +DROP TABLE IF EXISTS wikistat1 SYNC; +DROP TABLE IF EXISTS wikistat2 SYNC; +" + +for i in {1..2}; do + $CLICKHOUSE_CLIENT --query " + CREATE TABLE wikistat$i + ( + time DateTime, + project LowCardinality(String), + subproject LowCardinality(String), + path String, + hits UInt64, + PROJECTION total + ( + SELECT + project, + subproject, + path, + sum(hits), + count() + GROUP BY + project, + subproject, + path + ) + ) + ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/02494_zero_copy_projection_cancel_fetch', '$i') + ORDER BY (path, time) + SETTINGS min_bytes_for_wide_part = 0, storage_policy = 's3_cache', + allow_remote_fs_zero_copy_replication = 1, + max_replicated_fetches_network_bandwidth = 100 + " +done + +$CLICKHOUSE_CLIENT --query "SYSTEM STOP FETCHES wikistat2" +$CLICKHOUSE_CLIENT --query "INSERT INTO wikistat1 SELECT toDateTime('2020-10-01 00:00:00'), 'hello', 'world', '/data/path', 10 from numbers(1000)" + +$CLICKHOUSE_CLIENT --query "SYSTEM START FETCHES wikistat2" +$CLICKHOUSE_CLIENT --query "SYSTEM SYNC REPLICA wikistat2" & + +# With previous versions LOGICAL_ERROR will be thrown +# and server will be crashed in debug mode. +sleep 1.5 +$CLICKHOUSE_CLIENT --query "SYSTEM STOP FETCHES wikistat2" +sleep 1.5 + +$CLICKHOUSE_CLIENT --query "ALTER TABLE wikistat2 MODIFY SETTING max_replicated_fetches_network_bandwidth = 0" +$CLICKHOUSE_CLIENT --query "SYSTEM START FETCHES wikistat2" +wait + +$CLICKHOUSE_CLIENT --query "SELECT count() FROM wikistat1 WHERE NOT ignore(*)" +$CLICKHOUSE_CLIENT --query "SELECT count() FROM wikistat2 WHERE NOT ignore(*)" + +$CLICKHOUSE_CLIENT -n --query " +DROP TABLE IF EXISTS wikistat1 SYNC; +DROP TABLE IF EXISTS wikistat2 SYNC; +" From 5c1f490b3a2fdcca85e5786c3347f92c1ee61b9c Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Sun, 11 Dec 2022 00:43:42 +0100 Subject: [PATCH 21/35] Implement referential dependencies for table engine "Distributed" and for functions cluster() and clusterAllReplicas(). --- src/Databases/DDLDependencyVisitor.cpp | 115 ++++++++++++++++++ src/Interpreters/getClusterName.cpp | 19 ++- src/Interpreters/getClusterName.h | 1 + .../test_backup_restore_on_cluster/test.py | 78 ++++++++++++ 4 files changed, 210 insertions(+), 3 deletions(-) diff --git a/src/Databases/DDLDependencyVisitor.cpp b/src/Databases/DDLDependencyVisitor.cpp index 50130b6c89a..f0137e5bd60 100644 --- a/src/Databases/DDLDependencyVisitor.cpp +++ b/src/Databases/DDLDependencyVisitor.cpp @@ -1,14 +1,17 @@ #include #include +#include #include #include #include +#include #include #include #include #include #include #include +#include #include @@ -146,6 +149,27 @@ namespace /// Buffer('db_name', 'dest_table_name') if (table_engine.name == "Buffer") addDatabaseAndTableNameFromArguments(table_engine, 0, 1); + + /// Distributed(cluster_name, db_name, table_name, ...) + if (table_engine.name == "Distributed") + visitDistributedTableEngine(table_engine); + } + + /// Distributed(cluster_name, database_name, table_name, ...) + void visitDistributedTableEngine(const ASTFunction & table_engine) + { + /// We consider only dependencies on local tables. + bool has_local_replicas = false; + + if (auto cluster_name = tryGetClusterNameFromArgument(table_engine, 0)) + { + auto cluster = context->tryGetCluster(*cluster_name); + if (cluster && cluster->getLocalShardCount()) + has_local_replicas = true; + } + + if (has_local_replicas) + addDatabaseAndTableNameFromArguments(table_engine, 1, 2); } /// Finds dependencies of a function. @@ -168,6 +192,81 @@ namespace /// dictionary(dict_name) addQualifiedNameFromArgument(function, 0); } + else if (function.name == "remote" || function.name == "remoteSecure") + { + visitRemoteFunction(function, /* is_cluster_function= */ false); + } + else if (function.name == "cluster" || function.name == "clusterAllReplicas") + { + visitRemoteFunction(function, /* is_cluster_function= */ true); + } + } + + /// remote('addresses_expr', db_name.table_name, ...) + /// remote('addresses_expr', 'db_name', 'table_name', ...) + /// remote('addresses_expr', table_function(), ...) + /// cluster('cluster_name', db_name.table_name, ...) + /// cluster('cluster_name', 'db_name', 'table_name', ...) + /// cluster('cluster_name', table_function(), ...) + void visitRemoteFunction(const ASTFunction & function, bool is_cluster_function) + { + /// We consider dependencies on local tables only. + bool has_local_replicas = false; + + if (is_cluster_function) + { + if (auto cluster_name = tryGetClusterNameFromArgument(function, 0)) + { + if (auto cluster = context->tryGetCluster(*cluster_name)) + { + if (cluster->getLocalShardCount()) + has_local_replicas = true; + } + } + } + else + { + /// remote() and remoteSecure() are not fully supported. To properly support them we would need to check the first + /// argument to decide whether the host & port pattern specified in the first argument contains the local host or not + /// which is not trivial. For now we just always assume that the host & port pattern doesn't contain the local host. + } + + if (!function.arguments) + return; + + ASTs & args = function.arguments->children; + if (args.size() < 2) + return; + + const ASTFunction * table_function = nullptr; + if (const auto * second_arg_as_function = args[1]->as(); + second_arg_as_function && KnownTableFunctionNames::instance().exists(second_arg_as_function->name)) + { + table_function = second_arg_as_function; + } + + if (has_local_replicas && !table_function) + { + auto maybe_qualified_name = tryGetQualifiedNameFromArgument(function, 1, /* apply_current_database= */ false); + if (!maybe_qualified_name) + return; + auto & qualified_name = *maybe_qualified_name; + if (qualified_name.database.empty()) + { + auto table = tryGetStringFromArgument(function, 2); + if (!table) + return; + qualified_name.database = std::move(qualified_name.table); + qualified_name.table = std::move(table).value(); + } + dependencies.insert(qualified_name); + } + + if (!has_local_replicas && table_function) + { + /// `table function` will be executed remotely, so we won't check it or its arguments for dependencies. + skip_asts.emplace(table_function); + } } /// Gets an argument as a string, evaluates constants if necessary. @@ -279,6 +378,22 @@ namespace if (auto qualified_name = tryGetDatabaseAndTableNameFromArguments(function, database_arg_idx, table_arg_idx)) dependencies.emplace(std::move(qualified_name).value()); } + + std::optional tryGetClusterNameFromArgument(const ASTFunction & function, size_t arg_idx) const + { + if (!function.arguments) + return {}; + + ASTs & args = function.arguments->children; + if (arg_idx >= args.size()) + return {}; + + auto cluster_name = ::DB::tryGetClusterName(*args[arg_idx]); + if (cluster_name) + return cluster_name; + + return tryGetStringFromArgument(function, arg_idx); + } }; /// Visits ASTCreateQuery and extracts the names of all tables explicitly referenced in the create query. diff --git a/src/Interpreters/getClusterName.cpp b/src/Interpreters/getClusterName.cpp index d3c53b28cdf..dc3e9b41628 100644 --- a/src/Interpreters/getClusterName.cpp +++ b/src/Interpreters/getClusterName.cpp @@ -18,18 +18,31 @@ namespace ErrorCodes std::string getClusterName(const IAST & node) +{ + auto name = tryGetClusterName(node); + if (!name) + throw Exception("Illegal expression instead of cluster name.", ErrorCodes::BAD_ARGUMENTS); + return std::move(name).value(); +} + + +std::optional tryGetClusterName(const IAST & node) { if (const auto * ast_id = node.as()) return ast_id->name(); if (const auto * ast_lit = node.as()) - return checkAndGetLiteralArgument(*ast_lit, "cluster_name"); + { + if (ast_lit->value.getType() != Field::Types::String) + return {}; + return ast_lit->value.safeGet(); + } /// A hack to support hyphens in cluster names. if (const auto * ast_func = node.as()) { if (ast_func->name != "minus" || !ast_func->arguments || ast_func->arguments->children.size() < 2) - throw Exception("Illegal expression instead of cluster name.", ErrorCodes::BAD_ARGUMENTS); + return {}; String name; for (const auto & arg : ast_func->arguments->children) @@ -43,7 +56,7 @@ std::string getClusterName(const IAST & node) return name; } - throw Exception("Illegal expression instead of cluster name.", ErrorCodes::BAD_ARGUMENTS); + return {}; } diff --git a/src/Interpreters/getClusterName.h b/src/Interpreters/getClusterName.h index faf8975ede1..59952587a9a 100644 --- a/src/Interpreters/getClusterName.h +++ b/src/Interpreters/getClusterName.h @@ -15,6 +15,7 @@ namespace DB * Therefore, consider this case separately. */ std::string getClusterName(const IAST & node); +std::optional tryGetClusterName(const IAST & node); std::string getClusterNameAndMakeLiteral(ASTPtr & node); diff --git a/tests/integration/test_backup_restore_on_cluster/test.py b/tests/integration/test_backup_restore_on_cluster/test.py index 09915c8e789..27448b95b51 100644 --- a/tests/integration/test_backup_restore_on_cluster/test.py +++ b/tests/integration/test_backup_restore_on_cluster/test.py @@ -796,6 +796,84 @@ def test_mutation(): node1.query(f"RESTORE TABLE tbl ON CLUSTER 'cluster' FROM {backup_name}") +def test_tables_dependency(): + node1.query("CREATE DATABASE mydb ON CLUSTER 'cluster3'") + + node1.query( + "CREATE TABLE mydb.src ON CLUSTER 'cluster' (x Int64, y String) ENGINE=MergeTree ORDER BY tuple()" + ) + + node1.query( + "CREATE DICTIONARY mydb.dict ON CLUSTER 'cluster' (x Int64, y String) PRIMARY KEY x " + "SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() DB 'mydb' TABLE 'src')) LAYOUT(FLAT()) LIFETIME(0)" + ) + + node1.query( + "CREATE TABLE mydb.dist1 (x Int64) ENGINE=Distributed('cluster', 'mydb', 'src')" + ) + + node3.query( + "CREATE TABLE mydb.dist2 (x Int64) ENGINE=Distributed(cluster, 'mydb', 'src')" + ) + + node1.query("CREATE TABLE mydb.clusterfunc1 AS cluster('cluster', 'mydb.src')") + node1.query("CREATE TABLE mydb.clusterfunc2 AS cluster(cluster, mydb.src)") + node1.query("CREATE TABLE mydb.clusterfunc3 AS cluster(cluster, 'mydb', 'src')") + node1.query( + "CREATE TABLE mydb.clusterfunc4 AS cluster(cluster, dictionary(mydb.dict))" + ) + node1.query( + "CREATE TABLE mydb.clusterfunc5 AS clusterAllReplicas(cluster, dictionary(mydb.dict))" + ) + + node3.query("CREATE TABLE mydb.clusterfunc6 AS cluster('cluster', 'mydb.src')") + node3.query("CREATE TABLE mydb.clusterfunc7 AS cluster(cluster, mydb.src)") + node3.query("CREATE TABLE mydb.clusterfunc8 AS cluster(cluster, 'mydb', 'src')") + node3.query( + "CREATE TABLE mydb.clusterfunc9 AS cluster(cluster, dictionary(mydb.dict))" + ) + node3.query( + "CREATE TABLE mydb.clusterfunc10 AS clusterAllReplicas(cluster, dictionary(mydb.dict))" + ) + + backup_name = new_backup_name() + node3.query(f"BACKUP DATABASE mydb ON CLUSTER 'cluster3' TO {backup_name}") + + node3.query("DROP DATABASE mydb") + + node3.query(f"RESTORE DATABASE mydb ON CLUSTER 'cluster3' FROM {backup_name}") + + node3.query("SYSTEM FLUSH LOGS ON CLUSTER 'cluster3'") + expect_in_logs_1 = [ + "Table mydb.src has no dependencies (level 0)", + "Table mydb.dict has 1 dependencies: mydb.src (level 1)", + "Table mydb.dist1 has 1 dependencies: mydb.src (level 1)", + "Table mydb.clusterfunc1 has 1 dependencies: mydb.src (level 1)", + "Table mydb.clusterfunc2 has 1 dependencies: mydb.src (level 1)", + "Table mydb.clusterfunc3 has 1 dependencies: mydb.src (level 1)", + "Table mydb.clusterfunc4 has 1 dependencies: mydb.dict (level 2)", + "Table mydb.clusterfunc5 has 1 dependencies: mydb.dict (level 2)", + ] + expect_in_logs_2 = [ + "Table mydb.src has no dependencies (level 0)", + "Table mydb.dict has 1 dependencies: mydb.src (level 1)", + ] + expect_in_logs_3 = [ + "Table mydb.dist2 has no dependencies (level 0)", + "Table mydb.clusterfunc6 has no dependencies (level 0)", + "Table mydb.clusterfunc7 has no dependencies (level 0)", + "Table mydb.clusterfunc8 has no dependencies (level 0)", + "Table mydb.clusterfunc9 has no dependencies (level 0)", + "Table mydb.clusterfunc10 has no dependencies (level 0)", + ] + for expect in expect_in_logs_1: + assert node1.contains_in_log(f"RestorerFromBackup: {expect}") + for expect in expect_in_logs_2: + assert node2.contains_in_log(f"RestorerFromBackup: {expect}") + for expect in expect_in_logs_3: + assert node3.contains_in_log(f"RestorerFromBackup: {expect}") + + def test_get_error_from_other_host(): node1.query("CREATE TABLE tbl (`x` UInt8) ENGINE = MergeTree ORDER BY x") node1.query("INSERT INTO tbl VALUES (3)") From 3c72ee79b9c56adc11f6a7c6036da56ae1c74b1f Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Mon, 12 Dec 2022 20:19:31 +0100 Subject: [PATCH 22/35] Fix test 02025_dictionary_view_different_db (dictionaries without host & port are considered now as loading dependencies too). --- .../queries/0_stateless/02025_dictionary_view_different_db.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02025_dictionary_view_different_db.sql b/tests/queries/0_stateless/02025_dictionary_view_different_db.sql index b06285b683f..f26a63bced4 100644 --- a/tests/queries/0_stateless/02025_dictionary_view_different_db.sql +++ b/tests/queries/0_stateless/02025_dictionary_view_different_db.sql @@ -34,8 +34,8 @@ DROP VIEW IF EXISTS test_view_different_db; CREATE VIEW test_view_different_db AS SELECT id, value, dictGet('2025_test_db.test_dictionary', 'value', id) FROM 2025_test_db.view_table; SELECT * FROM test_view_different_db; -DROP TABLE 2025_test_db.test_table; DROP DICTIONARY 2025_test_db.test_dictionary; +DROP TABLE 2025_test_db.test_table; DROP TABLE 2025_test_db.view_table; DROP VIEW test_view_different_db; From 7765137371aa7a59ebd653362378edfb19289af6 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Tue, 13 Dec 2022 13:41:02 +0000 Subject: [PATCH 23/35] Minor fixes (local clang-tidy warnings) --- src/Functions/FunctionUnixTimestamp64.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Functions/FunctionUnixTimestamp64.h b/src/Functions/FunctionUnixTimestamp64.h index d869ccccca8..847ea74a784 100644 --- a/src/Functions/FunctionUnixTimestamp64.h +++ b/src/Functions/FunctionUnixTimestamp64.h @@ -106,7 +106,7 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { - if (arguments.size() < 1 || arguments.size() > 2) + if (arguments.empty() || arguments.size() > 2) throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} takes one or two arguments", name); if (!isInteger(arguments[0].type)) @@ -126,7 +126,7 @@ public: const auto & col = *src.column; if (!checkAndGetColumn>(col)) - return 0; + return false; auto & result_data = result_column->getData(); @@ -135,7 +135,7 @@ public: for (size_t i = 0; i < input_rows_count; ++i) result_data[i] = source_data[i]; - return 1; + return true; } ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override From 2c57f2bd1f4ccbfa0553a87321c5fccdda96f957 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Tue, 13 Dec 2022 14:11:08 +0000 Subject: [PATCH 24/35] Remove wrong assert --- src/Storages/MergeTree/MergeTreeData.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 767ea348ed1..ed21bd00d3b 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -2607,7 +2607,6 @@ void MergeTreeData::checkAlterIsPossible(const AlterCommands & commands, Context else { const auto & nested = old_metadata.columns.getNested(command.column_name); - assert(!nested.empty()); for (const auto & nested_column : nested) dropped_columns.emplace(nested_column.name); } From 4d02b480dac9ce1c3323f3b6cc73017e3c506945 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Tue, 13 Dec 2022 14:25:20 +0000 Subject: [PATCH 25/35] fix flaky test --- tests/queries/0_stateless/02344_describe_cache.sql | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02344_describe_cache.sql b/tests/queries/0_stateless/02344_describe_cache.sql index 8b3831bcaa8..fef004cb27f 100644 --- a/tests/queries/0_stateless/02344_describe_cache.sql +++ b/tests/queries/0_stateless/02344_describe_cache.sql @@ -1,4 +1,7 @@ --- Tags: no-fasttest +-- Tags: no-fasttest, no-parallel + +SYSTEM DROP FILESYSTEM CACHE 's3_cache/'; +SYSTEM DROP FILESYSTEM CACHE 's3_cache_2/'; DESCRIBE FILESYSTEM CACHE 's3_cache'; DESCRIBE FILESYSTEM CACHE 's3_cache_2'; From 34a589a7d8907e1d23521f8d1fabfc8319db620b Mon Sep 17 00:00:00 2001 From: Han Fei Date: Wed, 14 Dec 2022 14:06:12 +0100 Subject: [PATCH 26/35] create async_blocks zk path for old replicated tables and add a flag "async_insert_deduplicate" --- src/Core/Settings.h | 1 + src/Storages/StorageReplicatedMergeTree.cpp | 6 ++++-- tests/queries/0_stateless/02481_async_insert_dedup.python | 2 +- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 5c57d2082f5..acf11ca3148 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -222,6 +222,7 @@ static constexpr UInt64 operator""_GiB(unsigned long long value) M(UInt64, max_concurrent_queries_for_user, 0, "The maximum number of concurrent requests per user.", 0) \ \ M(Bool, insert_deduplicate, true, "For INSERT queries in the replicated table, specifies that deduplication of insertings blocks should be performed", 0) \ + M(Bool, async_insert_deduplicate, false, "For async INSERT queries in the replicated table, specifies that deduplication of insertings blocks should be performed", 0) \ \ M(UInt64Auto, insert_quorum, 0, "For INSERT queries in the replicated table, wait writing for the specified number of replicas and linearize the addition of the data. 0 - disabled, 'auto' - use majority", 0) \ M(Milliseconds, insert_quorum_timeout, 600000, "If the quorum of replicas did not meet in specified time (in milliseconds), exception will be thrown and insertion is aborted.", 0) \ diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index d8264eead54..566fcd783e6 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -633,6 +633,8 @@ void StorageReplicatedMergeTree::createNewZooKeeperNodes() futures.push_back(zookeeper->asyncTryCreateNoThrow(zookeeper_path + "/pinned_part_uuids", getPinnedPartUUIDs()->toString(), zkutil::CreateMode::Persistent)); /// For ALTER PARTITION with multi-leaders futures.push_back(zookeeper->asyncTryCreateNoThrow(zookeeper_path + "/alter_partition_version", String(), zkutil::CreateMode::Persistent)); + /// For deduplication of async inserts + futures.push_back(zookeeper->asyncTryCreateNoThrow(zookeeper_path + "/async_blocks", String(), zkutil::CreateMode::Persistent)); /// As for now, "/temp" node must exist, but we want to be able to remove it in future if (zookeeper->exists(zookeeper_path + "/temp")) @@ -4535,7 +4537,7 @@ SinkToStoragePtr StorageReplicatedMergeTree::write(const ASTPtr & /*query*/, con const auto storage_settings_ptr = getSettings(); const Settings & query_settings = local_context->getSettingsRef(); bool deduplicate = storage_settings_ptr->replicated_deduplication_window != 0 && query_settings.insert_deduplicate; - bool async_deduplicate = query_settings.async_insert && storage_settings_ptr->replicated_deduplication_window_for_async_inserts != 0 && query_settings.insert_deduplicate; + bool async_deduplicate = query_settings.async_insert && query_settings.async_insert_deduplicate && storage_settings_ptr->replicated_deduplication_window_for_async_inserts != 0 && query_settings.insert_deduplicate; if (async_deduplicate) return std::make_shared( *this, metadata_snapshot, query_settings.insert_quorum.valueOr(0), @@ -6562,7 +6564,7 @@ void StorageReplicatedMergeTree::getClearBlocksInPartitionOpsImpl( { Strings blocks; if (Coordination::Error::ZOK != zookeeper.tryGetChildren(fs::path(zookeeper_path) / blocks_dir_name, blocks)) - throw Exception(zookeeper_path + "/" + blocks_dir_name + "blocks doesn't exist", ErrorCodes::NOT_FOUND_NODE); + throw Exception(zookeeper_path + "/" + blocks_dir_name + "doesn't exist", ErrorCodes::NOT_FOUND_NODE); String partition_prefix = partition_id + "_"; Strings paths_to_get; diff --git a/tests/queries/0_stateless/02481_async_insert_dedup.python b/tests/queries/0_stateless/02481_async_insert_dedup.python index fac031434b4..404165941b9 100644 --- a/tests/queries/0_stateless/02481_async_insert_dedup.python +++ b/tests/queries/0_stateless/02481_async_insert_dedup.python @@ -68,7 +68,7 @@ def generate_data(q, total_number): def fetch_and_insert_data(q, client): while True: insert = q.get() - client.query(insert, settings = {"async_insert": 1, "wait_for_async_insert": 0, "async_insert_busy_timeout_ms": 1500, "insert_keeper_fault_injection_probability": 0}) + client.query(insert, settings = {"async_insert": 1, "async_insert_deduplicate": 1, "wait_for_async_insert": 0, "async_insert_busy_timeout_ms": 1500, "insert_keeper_fault_injection_probability": 0}) q.task_done() sleep_time = random.randint(50, 500) time.sleep(sleep_time/1000.0) From 623af800c26f1f45dcb972e969369f93d9051458 Mon Sep 17 00:00:00 2001 From: Han Fei Date: Wed, 14 Dec 2022 14:29:04 +0100 Subject: [PATCH 27/35] Update src/Storages/StorageReplicatedMergeTree.cpp Co-authored-by: Alexander Tokmakov --- src/Storages/StorageReplicatedMergeTree.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 566fcd783e6..99ceb1d90ae 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -6564,7 +6564,7 @@ void StorageReplicatedMergeTree::getClearBlocksInPartitionOpsImpl( { Strings blocks; if (Coordination::Error::ZOK != zookeeper.tryGetChildren(fs::path(zookeeper_path) / blocks_dir_name, blocks)) - throw Exception(zookeeper_path + "/" + blocks_dir_name + "doesn't exist", ErrorCodes::NOT_FOUND_NODE); + throw Exception(ErrorCodes::NOT_FOUND_NODE, "Node {}/{} doesn't exist", zookeeper_path, blocks_dir_name); String partition_prefix = partition_id + "_"; Strings paths_to_get; From 67fa1856111e0458ce863288dc1a5cdab95560ec Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Wed, 14 Dec 2022 17:17:19 +0300 Subject: [PATCH 28/35] Revert "Builtin skim" --- .gitignore | 5 - CMakeLists.txt | 2 - base/base/CMakeLists.txt | 5 - base/base/ReplxxLineReader.cpp | 130 +++- base/base/ReplxxLineReader.h | 12 + contrib/corrosion-cmake/CMakeLists.txt | 3 + docker/packager/binary/build.sh | 3 +- rust/.cargo/config.toml.in | 3 - rust/BLAKE3/CMakeLists.txt | 0 rust/CMakeLists.txt | 24 - rust/skim/.cargo/config.toml.in | 2 - rust/skim/.gitignore | 2 - rust/skim/CMakeLists.txt | 62 -- rust/skim/Cargo.lock | 982 ------------------------- rust/skim/Cargo.toml | 19 - rust/skim/build.rs.in | 8 - rust/skim/include/skim.h | 90 --- rust/skim/src/lib.rs | 49 -- src/Common/config.h.in | 1 - src/configure_config.cmake | 3 - 20 files changed, 119 insertions(+), 1286 deletions(-) delete mode 100644 rust/.cargo/config.toml.in mode change 100644 => 100755 rust/BLAKE3/CMakeLists.txt delete mode 100644 rust/skim/.cargo/config.toml.in delete mode 100644 rust/skim/.gitignore delete mode 100644 rust/skim/CMakeLists.txt delete mode 100644 rust/skim/Cargo.lock delete mode 100644 rust/skim/Cargo.toml delete mode 100644 rust/skim/build.rs.in delete mode 100644 rust/skim/include/skim.h delete mode 100644 rust/skim/src/lib.rs diff --git a/.gitignore b/.gitignore index 7d915186dcc..03bde052526 100644 --- a/.gitignore +++ b/.gitignore @@ -159,8 +159,3 @@ website/package-lock.json tests/queries/0_stateless/test_* tests/queries/0_stateless/*.binary tests/queries/0_stateless/*.generated-expect - -# rust -/rust/**/target -# It is autogenerated from *.in -/rust/**/.cargo/config.toml diff --git a/CMakeLists.txt b/CMakeLists.txt index 99997db96a1..bbad8b3b223 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -609,8 +609,6 @@ if (NATIVE_BUILD_TARGETS "-DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}" "-DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}" "-DENABLE_CCACHE=${ENABLE_CCACHE}" - # Avoid overriding .cargo/config.toml with native toolchain. - "-DENABLE_RUST=OFF" "-DENABLE_CLICKHOUSE_SELF_EXTRACTING=${ENABLE_CLICKHOUSE_SELF_EXTRACTING}" ${CMAKE_SOURCE_DIR} WORKING_DIRECTORY "${NATIVE_BUILD_DIR}" diff --git a/base/base/CMakeLists.txt b/base/base/CMakeLists.txt index d788bd6f092..175a4836e64 100644 --- a/base/base/CMakeLists.txt +++ b/base/base/CMakeLists.txt @@ -40,11 +40,6 @@ else () target_compile_definitions(common PUBLIC WITH_COVERAGE=0) endif () -# FIXME: move libraries for line reading out from base -if (TARGET ch_rust::skim) - target_link_libraries(common PUBLIC ch_rust::skim) -endif() - target_include_directories(common PUBLIC .. "${CMAKE_CURRENT_BINARY_DIR}/..") if (OS_DARWIN AND NOT USE_STATIC_LIBRARIES) diff --git a/base/base/ReplxxLineReader.cpp b/base/base/ReplxxLineReader.cpp index ffa10615936..b86746365b7 100644 --- a/base/base/ReplxxLineReader.cpp +++ b/base/base/ReplxxLineReader.cpp @@ -16,11 +16,9 @@ #include #include #include -#include "config.h" // USE_SKIM - -#if USE_SKIM -#include -#endif +#include +#include +#include /// is_any_of namespace { @@ -41,6 +39,36 @@ std::string getEditor() return editor; } +std::pair getFuzzyFinder() +{ + const char * env_path = std::getenv("PATH"); // NOLINT(concurrency-mt-unsafe) + + if (!env_path || !*env_path) + return {}; + + std::vector paths; + boost::split(paths, env_path, boost::is_any_of(":")); + for (const auto & path_str : paths) + { + std::filesystem::path path(path_str); + std::filesystem::path sk_bin_path = path / "sk"; + if (!access(sk_bin_path.c_str(), X_OK)) + return {sk_bin_path, FUZZY_FINDER_SKIM}; + + std::filesystem::path fzf_bin_path = path / "fzf"; + if (!access(fzf_bin_path.c_str(), X_OK)) + return {fzf_bin_path, FUZZY_FINDER_FZF}; + } + + return {"", FUZZY_FINDER_NONE}; +} + +String escapeShellArgument(std::string arg) +{ + boost::replace_all(arg, "'", "'\\''"); + return fmt::format("'{}'", arg); +} + /// See comments in ShellCommand::executeImpl() /// (for the vfork via dlsym()) int executeCommand(char * const argv[]) @@ -288,6 +316,8 @@ ReplxxLineReader::ReplxxLineReader( using namespace std::placeholders; using Replxx = replxx::Replxx; + std::tie(fuzzy_finder, fuzzy_finder_type) = getFuzzyFinder(); + if (!history_file_path.empty()) { history_file_fd = open(history_file_path.c_str(), O_RDWR); @@ -392,30 +422,17 @@ ReplxxLineReader::ReplxxLineReader( }; rx.bind_key(Replxx::KEY::meta('#'), insert_comment_action); -#if USE_SKIM - auto interactive_history_search = [this](char32_t code) + /// interactive search in history (requires fzf/sk) + if (fuzzy_finder_type != FUZZY_FINDER_NONE) { - std::vector words; + auto interactive_history_search = [this](char32_t code) { - auto hs(rx.history_scan()); - while (hs.next()) - words.push_back(hs.get().text()); - } - - std::string new_query(skim(words)); - if (!new_query.empty()) - rx.set_state(replxx::Replxx::State(new_query.c_str(), static_cast(new_query.size()))); - - if (bracketed_paste_enabled) - enableBracketedPaste(); - - rx.invoke(Replxx::ACTION::CLEAR_SELF, code); - return rx.invoke(Replxx::ACTION::REPAINT, code); - }; - - /// NOTE: You can use Ctrl-S for non-fuzzy complete. - rx.bind_key(Replxx::KEY::control('R'), interactive_history_search); -#endif + openInteractiveHistorySearch(); + rx.invoke(Replxx::ACTION::CLEAR_SELF, code); + return rx.invoke(Replxx::ACTION::REPAINT, code); + }; + rx.bind_key(Replxx::KEY::control('R'), interactive_history_search); + } } ReplxxLineReader::~ReplxxLineReader() @@ -484,6 +501,65 @@ void ReplxxLineReader::openEditor() enableBracketedPaste(); } +void ReplxxLineReader::openInteractiveHistorySearch() +{ + assert(!fuzzy_finder.empty()); + TemporaryFile history_file("clickhouse_client_history_in_XXXXXX.bin"); + auto hs(rx.history_scan()); + while (hs.next()) + { + history_file.write(hs.get().text()); + history_file.write(std::string(1, '\0')); + } + history_file.close(); + + TemporaryFile output_file("clickhouse_client_history_out_XXXXXX.sql"); + output_file.close(); + + char sh[] = "sh"; + char sh_c[] = "-c"; + /// NOTE: You can use one of the following to configure the behaviour additionally: + /// - SKIM_DEFAULT_OPTIONS + /// - FZF_DEFAULT_OPTS + /// + /// And also note, that fzf and skim is 95% compatible (at least option + /// that is used here) + std::string fuzzy_finder_command = fmt::format("{} --read0 --height=30%", fuzzy_finder); + switch (fuzzy_finder_type) + { + case FUZZY_FINDER_SKIM: + fuzzy_finder_command += " --tac --tiebreak=-score"; + break; + case FUZZY_FINDER_FZF: + fuzzy_finder_command += " --tac --tiebreak=index"; + break; + case FUZZY_FINDER_NONE: + /// assertion for !fuzzy_finder.empty() is enough + break; + } + fuzzy_finder_command += fmt::format(" < {} > {}", + escapeShellArgument(history_file.getPath()), + escapeShellArgument(output_file.getPath())); + char * const argv[] = {sh, sh_c, fuzzy_finder_command.data(), nullptr}; + + try + { + if (executeCommand(argv) == 0) + { + std::string new_query = readFile(output_file.getPath()); + rightTrim(new_query); + rx.set_state(replxx::Replxx::State(new_query.c_str(), static_cast(new_query.size()))); + } + } + catch (const std::runtime_error & e) + { + rx.print(e.what()); + } + + if (bracketed_paste_enabled) + enableBracketedPaste(); +} + void ReplxxLineReader::enableBracketedPaste() { bracketed_paste_enabled = true; diff --git a/base/base/ReplxxLineReader.h b/base/base/ReplxxLineReader.h index 428fbf144c3..9be3b3aa993 100644 --- a/base/base/ReplxxLineReader.h +++ b/base/base/ReplxxLineReader.h @@ -4,6 +4,15 @@ #include +enum FuzzyFinderType +{ + FUZZY_FINDER_NONE, + /// Use https://github.com/junegunn/fzf + FUZZY_FINDER_FZF, + /// Use https://github.com/lotabout/skim + FUZZY_FINDER_SKIM, +}; + class ReplxxLineReader : public LineReader { public: @@ -26,6 +35,7 @@ private: void addToHistory(const String & line) override; int executeEditor(const std::string & path); void openEditor(); + void openInteractiveHistorySearch(); replxx::Replxx rx; replxx::Replxx::highlighter_callback_t highlighter; @@ -35,4 +45,6 @@ private: bool bracketed_paste_enabled = false; std::string editor; + std::string fuzzy_finder; + FuzzyFinderType fuzzy_finder_type = FUZZY_FINDER_NONE; }; diff --git a/contrib/corrosion-cmake/CMakeLists.txt b/contrib/corrosion-cmake/CMakeLists.txt index 682f1197afa..ef810182a40 100644 --- a/contrib/corrosion-cmake/CMakeLists.txt +++ b/contrib/corrosion-cmake/CMakeLists.txt @@ -10,6 +10,9 @@ else() endif() option(ENABLE_RUST "Enable rust" ${DEFAULT_ENABLE_RUST}) + +message(STATUS ${ENABLE_RUST}) + if(NOT ENABLE_RUST) message(STATUS "Not using rust") return() diff --git a/docker/packager/binary/build.sh b/docker/packager/binary/build.sh index 436fcbe921c..c2de0e33d82 100755 --- a/docker/packager/binary/build.sh +++ b/docker/packager/binary/build.sh @@ -55,8 +55,7 @@ ccache --zero-stats ||: if [ "$BUILD_MUSL_KEEPER" == "1" ] then # build keeper with musl separately - # and without rust bindings - cmake --debug-trycompile -DENABLE_RUST=OFF -DBUILD_STANDALONE_KEEPER=1 -DENABLE_CLICKHOUSE_KEEPER=1 -DCMAKE_VERBOSE_MAKEFILE=1 -DUSE_MUSL=1 -LA -DCMAKE_TOOLCHAIN_FILE=/build/cmake/linux/toolchain-x86_64-musl.cmake "-DCMAKE_BUILD_TYPE=$BUILD_TYPE" "-DSANITIZE=$SANITIZER" -DENABLE_CHECK_HEAVY_BUILDS=1 "${CMAKE_FLAGS[@]}" .. + cmake --debug-trycompile -DBUILD_STANDALONE_KEEPER=1 -DENABLE_CLICKHOUSE_KEEPER=1 -DCMAKE_VERBOSE_MAKEFILE=1 -DUSE_MUSL=1 -LA -DCMAKE_TOOLCHAIN_FILE=/build/cmake/linux/toolchain-x86_64-musl.cmake "-DCMAKE_BUILD_TYPE=$BUILD_TYPE" "-DSANITIZE=$SANITIZER" -DENABLE_CHECK_HEAVY_BUILDS=1 "${CMAKE_FLAGS[@]}" .. # shellcheck disable=SC2086 # No quotes because I want it to expand to nothing if empty. ninja $NINJA_FLAGS clickhouse-keeper diff --git a/rust/.cargo/config.toml.in b/rust/.cargo/config.toml.in deleted file mode 100644 index a1dd966117b..00000000000 --- a/rust/.cargo/config.toml.in +++ /dev/null @@ -1,3 +0,0 @@ -[env] -CFLAGS = "@RUST_CFLAGS@" -CXXFLAGS = "@RUST_CXXFLAGS@" diff --git a/rust/BLAKE3/CMakeLists.txt b/rust/BLAKE3/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/rust/CMakeLists.txt b/rust/CMakeLists.txt index 002744949de..0d60ed66236 100644 --- a/rust/CMakeLists.txt +++ b/rust/CMakeLists.txt @@ -1,25 +1 @@ -function(configure_rustc) - # NOTE: this can also be done by overriding rustc, but it not trivial with rustup. - set(RUST_CFLAGS "${CMAKE_C_FLAGS}") - - set(CXX_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/llvm-project/libcxx/include") - set(RUST_CXXFLAGS "${CMAKE_CXX_FLAGS} -isystem ${CXX_INCLUDE_DIR} ") - - if (CMAKE_OSX_SYSROOT) - set(RUST_CXXFLAGS "${RUST_CXXFLAGS} -isysroot ${CMAKE_OSX_SYSROOT}") - set(RUST_CFLAGS "${RUST_CFLAGS} -isysroot ${CMAKE_OSX_SYSROOT}") - elseif(CMAKE_SYSROOT) - set(RUST_CXXFLAGS "${RUST_CXXFLAGS} --sysroot ${CMAKE_SYSROOT}") - set(RUST_CFLAGS "${RUST_CFLAGS} --sysroot ${CMAKE_SYSROOT}") - endif() - - message(STATUS "RUST_CFLAGS: ${RUST_CFLAGS}") - message(STATUS "RUST_CXXFLAGS: ${RUST_CXXFLAGS}") - - # NOTE: requires RW access for the source dir - configure_file("${CMAKE_CURRENT_SOURCE_DIR}/.cargo/config.toml.in" "${CMAKE_CURRENT_SOURCE_DIR}/.cargo/config.toml" @ONLY) -endfunction() -configure_rustc() - add_subdirectory (BLAKE3) -add_subdirectory (skim) diff --git a/rust/skim/.cargo/config.toml.in b/rust/skim/.cargo/config.toml.in deleted file mode 100644 index bcd4684f957..00000000000 --- a/rust/skim/.cargo/config.toml.in +++ /dev/null @@ -1,2 +0,0 @@ -[env] -CXXFLAGS = "@RUST_CXXFLAGS@" diff --git a/rust/skim/.gitignore b/rust/skim/.gitignore deleted file mode 100644 index 9ebbc84ba4a..00000000000 --- a/rust/skim/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -build.rs -.cargo/config.toml diff --git a/rust/skim/CMakeLists.txt b/rust/skim/CMakeLists.txt deleted file mode 100644 index e1b5faa12cb..00000000000 --- a/rust/skim/CMakeLists.txt +++ /dev/null @@ -1,62 +0,0 @@ -if (OS_FREEBSD) - # Right nix/libc requires fspacectl and it had been added only since FreeBSD14. - # And sicne sysroot has older libararies you will got undefined reference for clickhouse binary. - # - # But likely everything should work without this syscall, however it is not - # possible right now to gently override libraries versions for depdendcies, - # and forking rust modules is a little bit too much for this thing. - # - # You can take a look at the details in the fillowing issue [1]. - # - # [1]: https://github.com/rust-lang/cargo/issues/5640 - # - message(STATUS "skim is disabled for FreeBSD") - return() -endif() - -corrosion_import_crate(MANIFEST_PATH Cargo.toml NO_STD) - -set(CXX_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/llvm-project/libcxx/include") -# -Wno-dollar-in-identifier-extension: cxx bridge complies names with '$' -# -Wno-unused-macros: unused CXXBRIDGE1_RUST_STRING -set(CXXBRIDGE_CXXFLAGS "-Wno-dollar-in-identifier-extension -Wno-unused-macros") -set(RUST_CXXFLAGS "${CMAKE_CXX_FLAGS} -isystem ${CXX_INCLUDE_DIR} ${CXXBRIDGE_CXXFLAGS}") -if (CMAKE_OSX_SYSROOT) - set(RUST_CXXFLAGS "${RUST_CXXFLAGS} -isysroot ${CMAKE_OSX_SYSROOT}") -elseif(CMAKE_SYSROOT) - set(RUST_CXXFLAGS "${RUST_CXXFLAGS} --sysroot ${CMAKE_SYSROOT}") -endif() -message(STATUS "RUST_CXXFLAGS (for skim): ${RUST_CXXFLAGS}") -# NOTE: requires RW access for the source dir -configure_file("${CMAKE_CURRENT_SOURCE_DIR}/build.rs.in" "${CMAKE_CURRENT_SOURCE_DIR}/build.rs" @ONLY) -configure_file("${CMAKE_CURRENT_SOURCE_DIR}/.cargo/config.toml.in" "${CMAKE_CURRENT_SOURCE_DIR}/.cargo/config.toml" @ONLY) - -set (ffi_binding_generated_path - ${CMAKE_BINARY_DIR}/${CMAKE_BUILD_TYPE}/cargo/build/${Rust_CARGO_TARGET_CACHED}/cxxbridge/_ch_rust_skim_rust/src/lib.rs.cc) -set (ffi_binding_final_path ${CMAKE_CURRENT_BINARY_DIR}/skim-ffi.cc) -message(STATUS "Writing FFI Binding for skim: ${ffi_binding_generated_path} => ${ffi_binding_final_path}") - -add_custom_command(OUTPUT ${ffi_binding_final_path} - COMMAND ${CMAKE_COMMAND} -E copy ${ffi_binding_generated_path} ${ffi_binding_final_path} - DEPENDS cargo-build__ch_rust_skim_rust) - -add_library(_ch_rust_skim_ffi ${ffi_binding_final_path}) -if (USE_STATIC_LIBRARIES OR NOT SPLIT_SHARED_LIBRARIES) - # static -else() - if (OS_DARWIN) - target_link_libraries(_ch_rust_skim_ffi PRIVATE -Wl,-undefined,dynamic_lookup) - else() - target_link_libraries(_ch_rust_skim_ffi PRIVATE -Wl,--unresolved-symbols=ignore-all) - endif() -endif() -# cxx bridge compiles such bindings -set_target_properties(_ch_rust_skim_ffi PROPERTIES COMPILE_FLAGS "${CXXBRIDGE_CXXFLAGS}") - -add_library(_ch_rust_skim INTERFACE) -target_include_directories(_ch_rust_skim INTERFACE include) -target_link_libraries(_ch_rust_skim INTERFACE - _ch_rust_skim_rust - _ch_rust_skim_ffi) - -add_library(ch_rust::skim ALIAS _ch_rust_skim) diff --git a/rust/skim/Cargo.lock b/rust/skim/Cargo.lock deleted file mode 100644 index f61e8a084e1..00000000000 --- a/rust/skim/Cargo.lock +++ /dev/null @@ -1,982 +0,0 @@ -# This file is automatically @generated by Cargo. -# It is not intended for manual editing. -version = 3 - -[[package]] -name = "_ch_rust_skim_rust" -version = "0.1.0" -dependencies = [ - "cxx", - "cxx-build", - "skim", -] - -[[package]] -name = "aho-corasick" -version = "0.7.20" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cc936419f96fa211c1b9166887b38e5e40b19958e5b895be7c1f93adec7071ac" -dependencies = [ - "memchr", -] - -[[package]] -name = "android_system_properties" -version = "0.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" -dependencies = [ - "libc", -] - -[[package]] -name = "arrayvec" -version = "0.7.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8da52d66c7071e2e3fa2a1e5c6d088fec47b593032b254f5e980de8ea54454d6" - -[[package]] -name = "atty" -version = "0.2.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" -dependencies = [ - "hermit-abi", - "libc", - "winapi", -] - -[[package]] -name = "autocfg" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" - -[[package]] -name = "beef" -version = "0.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3a8241f3ebb85c056b509d4327ad0358fbbba6ffb340bf388f26350aeda225b1" - -[[package]] -name = "bitflags" -version = "1.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" - -[[package]] -name = "bumpalo" -version = "3.11.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "572f695136211188308f16ad2ca5c851a712c464060ae6974944458eb83880ba" - -[[package]] -name = "cc" -version = "1.0.77" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e9f73505338f7d905b19d18738976aae232eb46b8efc15554ffc56deb5d9ebe4" - -[[package]] -name = "cfg-if" -version = "1.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" - -[[package]] -name = "chrono" -version = "0.4.23" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "16b0a3d9ed01224b22057780a37bb8c5dbfe1be8ba48678e7bf57ec4b385411f" -dependencies = [ - "iana-time-zone", - "js-sys", - "num-integer", - "num-traits", - "time 0.1.45", - "wasm-bindgen", - "winapi", -] - -[[package]] -name = "clap" -version = "3.2.23" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "71655c45cb9845d3270c9d6df84ebe72b4dad3c2ba3f7023ad47c144e4e473a5" -dependencies = [ - "atty", - "bitflags", - "clap_lex", - "indexmap", - "once_cell", - "strsim", - "termcolor", - "textwrap", -] - -[[package]] -name = "clap_lex" -version = "0.2.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2850f2f5a82cbf437dd5af4d49848fbdfc27c157c3d010345776f952765261c5" -dependencies = [ - "os_str_bytes", -] - -[[package]] -name = "codespan-reporting" -version = "0.11.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3538270d33cc669650c4b093848450d380def10c331d38c768e34cac80576e6e" -dependencies = [ - "termcolor", - "unicode-width", -] - -[[package]] -name = "core-foundation-sys" -version = "0.8.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5827cebf4670468b8772dd191856768aedcb1b0278a04f989f7766351917b9dc" - -[[package]] -name = "crossbeam" -version = "0.8.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2801af0d36612ae591caa9568261fddce32ce6e08a7275ea334a06a4ad021a2c" -dependencies = [ - "cfg-if", - "crossbeam-channel", - "crossbeam-deque", - "crossbeam-epoch", - "crossbeam-queue", - "crossbeam-utils", -] - -[[package]] -name = "crossbeam-channel" -version = "0.5.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c2dd04ddaf88237dc3b8d8f9a3c1004b506b54b3313403944054d23c0870c521" -dependencies = [ - "cfg-if", - "crossbeam-utils", -] - -[[package]] -name = "crossbeam-deque" -version = "0.8.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "715e8152b692bba2d374b53d4875445368fdf21a94751410af607a5ac677d1fc" -dependencies = [ - "cfg-if", - "crossbeam-epoch", - "crossbeam-utils", -] - -[[package]] -name = "crossbeam-epoch" -version = "0.9.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "01a9af1f4c2ef74bb8aa1f7e19706bc72d03598c8a570bb5de72243c7a9d9d5a" -dependencies = [ - "autocfg", - "cfg-if", - "crossbeam-utils", - "memoffset 0.7.1", - "scopeguard", -] - -[[package]] -name = "crossbeam-queue" -version = "0.3.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d1cfb3ea8a53f37c40dea2c7bedcbd88bdfae54f5e2175d6ecaff1c988353add" -dependencies = [ - "cfg-if", - "crossbeam-utils", -] - -[[package]] -name = "crossbeam-utils" -version = "0.8.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4fb766fa798726286dbbb842f174001dab8abc7b627a1dd86e0b7222a95d929f" -dependencies = [ - "cfg-if", -] - -[[package]] -name = "cxx" -version = "1.0.83" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bdf07d07d6531bfcdbe9b8b739b104610c6508dcc4d63b410585faf338241daf" -dependencies = [ - "cc", - "cxxbridge-flags", - "cxxbridge-macro", - "link-cplusplus", -] - -[[package]] -name = "cxx-build" -version = "1.0.83" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d2eb5b96ecdc99f72657332953d4d9c50135af1bac34277801cc3937906ebd39" -dependencies = [ - "cc", - "codespan-reporting", - "once_cell", - "proc-macro2", - "quote", - "scratch", - "syn", -] - -[[package]] -name = "cxxbridge-flags" -version = "1.0.83" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac040a39517fd1674e0f32177648334b0f4074625b5588a64519804ba0553b12" - -[[package]] -name = "cxxbridge-macro" -version = "1.0.83" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1362b0ddcfc4eb0a1f57b68bd77dd99f0e826958a96abd0ae9bd092e114ffed6" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "darling" -version = "0.14.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b0dd3cd20dc6b5a876612a6e5accfe7f3dd883db6d07acfbf14c128f61550dfa" -dependencies = [ - "darling_core", - "darling_macro", -] - -[[package]] -name = "darling_core" -version = "0.14.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a784d2ccaf7c98501746bf0be29b2022ba41fd62a2e622af997a03e9f972859f" -dependencies = [ - "fnv", - "ident_case", - "proc-macro2", - "quote", - "strsim", - "syn", -] - -[[package]] -name = "darling_macro" -version = "0.14.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7618812407e9402654622dd402b0a89dff9ba93badd6540781526117b92aab7e" -dependencies = [ - "darling_core", - "quote", - "syn", -] - -[[package]] -name = "defer-drop" -version = "1.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f613ec9fa66a6b28cdb1842b27f9adf24f39f9afc4dcdd9fdecee4aca7945c57" -dependencies = [ - "crossbeam-channel", - "once_cell", -] - -[[package]] -name = "derive_builder" -version = "0.11.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d07adf7be193b71cc36b193d0f5fe60b918a3a9db4dad0449f57bcfd519704a3" -dependencies = [ - "derive_builder_macro", -] - -[[package]] -name = "derive_builder_core" -version = "0.11.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1f91d4cfa921f1c05904dc3c57b4a32c38aed3340cce209f3a6fd1478babafc4" -dependencies = [ - "darling", - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "derive_builder_macro" -version = "0.11.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f0314b72bed045f3a68671b3c86328386762c93f82d98c65c3cb5e5f573dd68" -dependencies = [ - "derive_builder_core", - "syn", -] - -[[package]] -name = "dirs-next" -version = "2.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b98cf8ebf19c3d1b223e151f99a4f9f0690dca41414773390fc824184ac833e1" -dependencies = [ - "cfg-if", - "dirs-sys-next", -] - -[[package]] -name = "dirs-sys-next" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4ebda144c4fe02d1f7ea1a7d9641b6fc6b580adcfa024ae48797ecdeb6825b4d" -dependencies = [ - "libc", - "redox_users", - "winapi", -] - -[[package]] -name = "either" -version = "1.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "90e5c1c8368803113bf0c9584fc495a58b86dc8a29edbf8fe877d21d9507e797" - -[[package]] -name = "env_logger" -version = "0.9.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a12e6657c4c97ebab115a42dcee77225f7f482cdd841cf7088c657a42e9e00e7" -dependencies = [ - "atty", - "humantime", - "log", - "regex", - "termcolor", -] - -[[package]] -name = "fnv" -version = "1.0.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" - -[[package]] -name = "fuzzy-matcher" -version = "0.3.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "54614a3312934d066701a80f20f15fa3b56d67ac7722b39eea5b4c9dd1d66c94" -dependencies = [ - "thread_local", -] - -[[package]] -name = "getrandom" -version = "0.2.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c05aeb6a22b8f62540c194aac980f2115af067bfe15a0734d7277a768d396b31" -dependencies = [ - "cfg-if", - "libc", - "wasi 0.11.0+wasi-snapshot-preview1", -] - -[[package]] -name = "hashbrown" -version = "0.12.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" - -[[package]] -name = "hermit-abi" -version = "0.1.19" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33" -dependencies = [ - "libc", -] - -[[package]] -name = "humantime" -version = "2.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" - -[[package]] -name = "iana-time-zone" -version = "0.1.53" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "64c122667b287044802d6ce17ee2ddf13207ed924c712de9a66a5814d5b64765" -dependencies = [ - "android_system_properties", - "core-foundation-sys", - "iana-time-zone-haiku", - "js-sys", - "wasm-bindgen", - "winapi", -] - -[[package]] -name = "iana-time-zone-haiku" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0703ae284fc167426161c2e3f1da3ea71d94b21bedbcc9494e92b28e334e3dca" -dependencies = [ - "cxx", - "cxx-build", -] - -[[package]] -name = "ident_case" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" - -[[package]] -name = "indexmap" -version = "1.9.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1885e79c1fc4b10f0e172c475f458b7f7b93061064d98c3293e98c5ba0c8b399" -dependencies = [ - "autocfg", - "hashbrown", -] - -[[package]] -name = "js-sys" -version = "0.3.60" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49409df3e3bf0856b916e2ceaca09ee28e6871cf7d9ce97a692cacfdb2a25a47" -dependencies = [ - "wasm-bindgen", -] - -[[package]] -name = "lazy_static" -version = "1.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" - -[[package]] -name = "libc" -version = "0.2.138" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "db6d7e329c562c5dfab7a46a2afabc8b987ab9a4834c9d1ca04dc54c1546cef8" - -[[package]] -name = "link-cplusplus" -version = "1.0.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9272ab7b96c9046fbc5bc56c06c117cb639fe2d509df0c421cad82d2915cf369" -dependencies = [ - "cc", -] - -[[package]] -name = "log" -version = "0.4.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "abb12e687cfb44aa40f41fc3978ef76448f9b6038cad6aef4259d3c095a2382e" -dependencies = [ - "cfg-if", -] - -[[package]] -name = "memchr" -version = "2.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" - -[[package]] -name = "memoffset" -version = "0.6.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5aa361d4faea93603064a027415f07bd8e1d5c88c9fbf68bf56a285428fd79ce" -dependencies = [ - "autocfg", -] - -[[package]] -name = "memoffset" -version = "0.7.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5de893c32cde5f383baa4c04c5d6dbdd735cfd4a794b0debdb2bb1b421da5ff4" -dependencies = [ - "autocfg", -] - -[[package]] -name = "nix" -version = "0.24.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fa52e972a9a719cecb6864fb88568781eb706bac2cd1d4f04a648542dbf78069" -dependencies = [ - "bitflags", - "cfg-if", - "libc", -] - -[[package]] -name = "nix" -version = "0.25.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f346ff70e7dbfd675fe90590b92d59ef2de15a8779ae305ebcbfd3f0caf59be4" -dependencies = [ - "autocfg", - "bitflags", - "cfg-if", - "libc", - "memoffset 0.6.5", - "pin-utils", -] - -[[package]] -name = "num-integer" -version = "0.1.45" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "225d3389fb3509a24c93f5c29eb6bde2586b98d9f016636dff58d7c6f7569cd9" -dependencies = [ - "autocfg", - "num-traits", -] - -[[package]] -name = "num-traits" -version = "0.2.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "578ede34cf02f8924ab9447f50c28075b4d3e5b269972345e7e0372b38c6cdcd" -dependencies = [ - "autocfg", -] - -[[package]] -name = "num_cpus" -version = "1.14.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f6058e64324c71e02bc2b150e4f3bc8286db6c83092132ffa3f6b1eab0f9def5" -dependencies = [ - "hermit-abi", - "libc", -] - -[[package]] -name = "once_cell" -version = "1.16.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86f0b0d4bf799edbc74508c1e8bf170ff5f41238e5f8225603ca7caaae2b7860" - -[[package]] -name = "os_str_bytes" -version = "6.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b7820b9daea5457c9f21c69448905d723fbd21136ccf521748f23fd49e723ee" - -[[package]] -name = "pin-utils" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" - -[[package]] -name = "proc-macro2" -version = "1.0.47" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ea3d908b0e36316caf9e9e2c4625cdde190a7e6f440d794667ed17a1855e725" -dependencies = [ - "unicode-ident", -] - -[[package]] -name = "quote" -version = "1.0.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bbe448f377a7d6961e30f5955f9b8d106c3f5e449d493ee1b125c1d43c2b5179" -dependencies = [ - "proc-macro2", -] - -[[package]] -name = "rayon" -version = "1.6.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6db3a213adf02b3bcfd2d3846bb41cb22857d131789e01df434fb7e7bc0759b7" -dependencies = [ - "either", - "rayon-core", -] - -[[package]] -name = "rayon-core" -version = "1.10.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cac410af5d00ab6884528b4ab69d1e8e146e8d471201800fa1b4524126de6ad3" -dependencies = [ - "crossbeam-channel", - "crossbeam-deque", - "crossbeam-utils", - "num_cpus", -] - -[[package]] -name = "redox_syscall" -version = "0.2.16" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fb5a58c1855b4b6819d59012155603f0b22ad30cad752600aadfcb695265519a" -dependencies = [ - "bitflags", -] - -[[package]] -name = "redox_users" -version = "0.4.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b033d837a7cf162d7993aded9304e30a83213c648b6e389db233191f891e5c2b" -dependencies = [ - "getrandom", - "redox_syscall", - "thiserror", -] - -[[package]] -name = "regex" -version = "1.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e076559ef8e241f2ae3479e36f97bd5741c0330689e217ad51ce2c76808b868a" -dependencies = [ - "aho-corasick", - "memchr", - "regex-syntax", -] - -[[package]] -name = "regex-syntax" -version = "0.6.28" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "456c603be3e8d448b072f410900c09faf164fbce2d480456f50eea6e25f9c848" - -[[package]] -name = "rustversion" -version = "1.0.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97477e48b4cf8603ad5f7aaf897467cf42ab4218a38ef76fb14c2d6773a6d6a8" - -[[package]] -name = "scopeguard" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" - -[[package]] -name = "scratch" -version = "1.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c8132065adcfd6e02db789d9285a0deb2f3fcb04002865ab67d5fb103533898" - -[[package]] -name = "serde" -version = "1.0.149" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "256b9932320c590e707b94576e3cc1f7c9024d0ee6612dfbcf1cb106cbe8e055" - -[[package]] -name = "shlex" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43b2853a4d09f215c24cc5489c992ce46052d359b5109343cbafbf26bc62f8a3" - -[[package]] -name = "skim" -version = "0.10.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cebed5f897cd6c0d80fbe30adb36c0abf7400e93043a63ae56458495642b3485" -dependencies = [ - "atty", - "beef", - "bitflags", - "chrono", - "clap", - "crossbeam", - "defer-drop", - "derive_builder", - "env_logger", - "fuzzy-matcher", - "lazy_static", - "log", - "nix 0.25.1", - "rayon", - "regex", - "shlex", - "time 0.3.17", - "timer", - "tuikit", - "unicode-width", - "vte", -] - -[[package]] -name = "strsim" -version = "0.10.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" - -[[package]] -name = "syn" -version = "1.0.105" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "60b9b43d45702de4c839cb9b51d9f529c5dd26a4aff255b42b1ebc03e88ee908" -dependencies = [ - "proc-macro2", - "quote", - "unicode-ident", -] - -[[package]] -name = "term" -version = "0.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c59df8ac95d96ff9bede18eb7300b0fda5e5d8d90960e76f8e14ae765eedbf1f" -dependencies = [ - "dirs-next", - "rustversion", - "winapi", -] - -[[package]] -name = "termcolor" -version = "1.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bab24d30b911b2376f3a13cc2cd443142f0c81dda04c118693e35b3835757755" -dependencies = [ - "winapi-util", -] - -[[package]] -name = "textwrap" -version = "0.16.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "222a222a5bfe1bba4a77b45ec488a741b3cb8872e5e499451fd7d0129c9c7c3d" - -[[package]] -name = "thiserror" -version = "1.0.37" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "10deb33631e3c9018b9baf9dcbbc4f737320d2b576bac10f6aefa048fa407e3e" -dependencies = [ - "thiserror-impl", -] - -[[package]] -name = "thiserror-impl" -version = "1.0.37" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "982d17546b47146b28f7c22e3d08465f6b8903d0ea13c1660d9d84a6e7adcdbb" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "thread_local" -version = "1.1.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5516c27b78311c50bf42c071425c560ac799b11c30b31f87e3081965fe5e0180" -dependencies = [ - "once_cell", -] - -[[package]] -name = "time" -version = "0.1.45" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b797afad3f312d1c66a56d11d0316f916356d11bd158fbc6ca6389ff6bf805a" -dependencies = [ - "libc", - "wasi 0.10.0+wasi-snapshot-preview1", - "winapi", -] - -[[package]] -name = "time" -version = "0.3.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a561bf4617eebd33bca6434b988f39ed798e527f51a1e797d0ee4f61c0a38376" -dependencies = [ - "serde", - "time-core", -] - -[[package]] -name = "time-core" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2e153e1f1acaef8acc537e68b44906d2db6436e2b35ac2c6b42640fff91f00fd" - -[[package]] -name = "timer" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "31d42176308937165701f50638db1c31586f183f1aab416268216577aec7306b" -dependencies = [ - "chrono", -] - -[[package]] -name = "tuikit" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5e19c6ab038babee3d50c8c12ff8b910bdb2196f62278776422f50390d8e53d8" -dependencies = [ - "bitflags", - "lazy_static", - "log", - "nix 0.24.3", - "term", - "unicode-width", -] - -[[package]] -name = "unicode-ident" -version = "1.0.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ceab39d59e4c9499d4e5a8ee0e2735b891bb7308ac83dfb4e80cad195c9f6f3" - -[[package]] -name = "unicode-width" -version = "0.1.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c0edd1e5b14653f783770bce4a4dabb4a5108a5370a5f5d8cfe8710c361f6c8b" - -[[package]] -name = "utf8parse" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "936e4b492acfd135421d8dca4b1aa80a7bfc26e702ef3af710e0752684df5372" - -[[package]] -name = "vte" -version = "0.11.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1aae21c12ad2ec2d168c236f369c38ff332bc1134f7246350dca641437365045" -dependencies = [ - "arrayvec", - "utf8parse", - "vte_generate_state_changes", -] - -[[package]] -name = "vte_generate_state_changes" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d257817081c7dffcdbab24b9e62d2def62e2ff7d00b1c20062551e6cccc145ff" -dependencies = [ - "proc-macro2", - "quote", -] - -[[package]] -name = "wasi" -version = "0.10.0+wasi-snapshot-preview1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a143597ca7c7793eff794def352d41792a93c481eb1042423ff7ff72ba2c31f" - -[[package]] -name = "wasi" -version = "0.11.0+wasi-snapshot-preview1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" - -[[package]] -name = "wasm-bindgen" -version = "0.2.83" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eaf9f5aceeec8be17c128b2e93e031fb8a4d469bb9c4ae2d7dc1888b26887268" -dependencies = [ - "cfg-if", - "wasm-bindgen-macro", -] - -[[package]] -name = "wasm-bindgen-backend" -version = "0.2.83" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c8ffb332579b0557b52d268b91feab8df3615f265d5270fec2a8c95b17c1142" -dependencies = [ - "bumpalo", - "log", - "once_cell", - "proc-macro2", - "quote", - "syn", - "wasm-bindgen-shared", -] - -[[package]] -name = "wasm-bindgen-macro" -version = "0.2.83" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "052be0f94026e6cbc75cdefc9bae13fd6052cdcaf532fa6c45e7ae33a1e6c810" -dependencies = [ - "quote", - "wasm-bindgen-macro-support", -] - -[[package]] -name = "wasm-bindgen-macro-support" -version = "0.2.83" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "07bc0c051dc5f23e307b13285f9d75df86bfdf816c5721e573dec1f9b8aa193c" -dependencies = [ - "proc-macro2", - "quote", - "syn", - "wasm-bindgen-backend", - "wasm-bindgen-shared", -] - -[[package]] -name = "wasm-bindgen-shared" -version = "0.2.83" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1c38c045535d93ec4f0b4defec448e4291638ee608530863b1e2ba115d4fff7f" - -[[package]] -name = "winapi" -version = "0.3.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" -dependencies = [ - "winapi-i686-pc-windows-gnu", - "winapi-x86_64-pc-windows-gnu", -] - -[[package]] -name = "winapi-i686-pc-windows-gnu" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" - -[[package]] -name = "winapi-util" -version = "0.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178" -dependencies = [ - "winapi", -] - -[[package]] -name = "winapi-x86_64-pc-windows-gnu" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" diff --git a/rust/skim/Cargo.toml b/rust/skim/Cargo.toml deleted file mode 100644 index f665f249d7c..00000000000 --- a/rust/skim/Cargo.toml +++ /dev/null @@ -1,19 +0,0 @@ -[package] -name = "_ch_rust_skim_rust" -version = "0.1.0" -edition = "2021" - -# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html - -[dependencies] -skim = "0.10.2" -cxx = "1.0.83" - -[build-dependencies] -cxx-build = "1.0.83" - -[lib] -crate-type = ["staticlib"] - -[profile.release] -debug = true diff --git a/rust/skim/build.rs.in b/rust/skim/build.rs.in deleted file mode 100644 index 0135c93222f..00000000000 --- a/rust/skim/build.rs.in +++ /dev/null @@ -1,8 +0,0 @@ -fn main() { - let mut build = cxx_build::bridge("src/lib.rs"); - for flag in "@RUST_CXXFLAGS@".split(' ') { - build.flag(flag); - } - build.compile("skim"); - println!("cargo:rerun-if-changed=src/lib.rs"); -} diff --git a/rust/skim/include/skim.h b/rust/skim/include/skim.h deleted file mode 100644 index 6a2d5806e3e..00000000000 --- a/rust/skim/include/skim.h +++ /dev/null @@ -1,90 +0,0 @@ -/// This header was compiled with: -/// -/// $ cxxbridge rust/skim/src/lib.rs --header -/// -/// For more info [1]. -/// -/// [1]: https://cxx.rs/build/other.html - -#pragma once -#include -#include -#include -#include - -namespace rust { -inline namespace cxxbridge1 { -// #include "rust/cxx.h" - -struct unsafe_bitcopy_t; - -#ifndef CXXBRIDGE1_RUST_STRING -#define CXXBRIDGE1_RUST_STRING -class String final { -public: - String() noexcept; - String(const String &) noexcept; - String(String &&) noexcept; - ~String() noexcept; - - String(const std::string &); - String(const char *); - String(const char *, std::size_t); - String(const char16_t *); - String(const char16_t *, std::size_t); - - static String lossy(const std::string &) noexcept; - static String lossy(const char *) noexcept; - static String lossy(const char *, std::size_t) noexcept; - static String lossy(const char16_t *) noexcept; - static String lossy(const char16_t *, std::size_t) noexcept; - - String &operator=(const String &) &noexcept; - String &operator=(String &&) &noexcept; - - explicit operator std::string() const; - - const char *data() const noexcept; - std::size_t size() const noexcept; - std::size_t length() const noexcept; - bool empty() const noexcept; - - const char *c_str() noexcept; - - std::size_t capacity() const noexcept; - void reserve(size_t new_cap) noexcept; - - using iterator = char *; - iterator begin() noexcept; - iterator end() noexcept; - - using const_iterator = const char *; - const_iterator begin() const noexcept; - const_iterator end() const noexcept; - const_iterator cbegin() const noexcept; - const_iterator cend() const noexcept; - - bool operator==(const String &) const noexcept; - bool operator!=(const String &) const noexcept; - bool operator<(const String &) const noexcept; - bool operator<=(const String &) const noexcept; - bool operator>(const String &) const noexcept; - bool operator>=(const String &) const noexcept; - - void swap(String &) noexcept; - - String(unsafe_bitcopy_t, const String &) noexcept; - -private: - struct lossy_t; - String(lossy_t, const char *, std::size_t) noexcept; - String(lossy_t, const char16_t *, std::size_t) noexcept; - friend void swap(String &lhs, String &rhs) noexcept { lhs.swap(rhs); } - - std::array repr; -}; -#endif // CXXBRIDGE1_RUST_STRING -} // namespace cxxbridge1 -} // namespace rust - -::rust::String skim(::std::vector<::std::string> const &words) noexcept; diff --git a/rust/skim/src/lib.rs b/rust/skim/src/lib.rs deleted file mode 100644 index 520aaae34c8..00000000000 --- a/rust/skim/src/lib.rs +++ /dev/null @@ -1,49 +0,0 @@ -use skim::prelude::*; -use cxx::{CxxString, CxxVector}; - -#[cxx::bridge] -mod ffi { - extern "Rust" { - fn skim(words: &CxxVector) -> String; - } -} - -struct Item { - text: String, -} -impl SkimItem for Item { - fn text(&self) -> Cow { - return Cow::Borrowed(&self.text); - } -} - -fn skim(words: &CxxVector) -> String { - // TODO: configure colors - let options = SkimOptionsBuilder::default() - .height(Some("30%")) - .tac(true) - .tiebreak(Some("-score".to_string())) - .build() - .unwrap(); - - let (tx, rx): (SkimItemSender, SkimItemReceiver) = unbounded(); - for word in words { - tx.send(Arc::new(Item{ text: word.to_string() })).unwrap(); - } - // so that skim could know when to stop waiting for more items. - drop(tx); - - let output = Skim::run_with(&options, Some(rx)); - if output.is_none() { - return "".to_string(); - } - let output = output.unwrap(); - if output.is_abort { - return "".to_string(); - } - - if output.selected_items.is_empty() { - return "".to_string(); - } - return output.selected_items[0].output().to_string(); -} diff --git a/src/Common/config.h.in b/src/Common/config.h.in index baa480a6545..80fda0781ee 100644 --- a/src/Common/config.h.in +++ b/src/Common/config.h.in @@ -52,5 +52,4 @@ #cmakedefine01 USE_ODBC #cmakedefine01 USE_BORINGSSL #cmakedefine01 USE_BLAKE3 -#cmakedefine01 USE_SKIM #cmakedefine01 USE_OPENSSL_INTREE diff --git a/src/configure_config.cmake b/src/configure_config.cmake index 58cb34b7d67..d7cdb769525 100644 --- a/src/configure_config.cmake +++ b/src/configure_config.cmake @@ -22,9 +22,6 @@ endif() if (TARGET ch_rust::blake3) set(USE_BLAKE3 1) endif() -if (TARGET ch_rust::skim) - set(USE_SKIM 1) -endif() if (TARGET OpenSSL::SSL) set(USE_SSL 1) endif() From 02d3b1a25676f43e76372fce535a1bffad978483 Mon Sep 17 00:00:00 2001 From: DanRoscigno Date: Wed, 14 Dec 2022 09:28:31 -0500 Subject: [PATCH 29/35] edits --- CHANGELOG.md | 90 ++++++++++++++++++++++++++-------------------------- 1 file changed, 45 insertions(+), 45 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1c49207685e..1393eb7b1d0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,13 +18,13 @@ ### ClickHouse release 22.12, 2022-12-15 #### Upgrade Notes -* Fixed backward incompatibility in (de)serialization of states of `min`, `max`, `any*`, `argMin`, `argMax` aggregate functions with `String` argument. The incompatibility affects 22.9, 22.10 and 22.11 branches (fixed since 22.9.6, 22.10.4 and 22.11.2 correspondingly). Some minor releases of 22.3, 22.7 and 22.8 branches are also affected: 22.3.13...22.3.14 (fixed since 22.3.15), 22.8.6...22.8.9 (fixed since 22.8.10), 22.7.6 and newer (will not be fixed in 22.7, we recommend to upgrade from 22.7.* to 22.8.10 or newer). This release note does not concern users that have never used affected versions. Incompatible versions append extra `'\0'` to strings when reading states of the aggregate functions mentioned above. For example, if an older version saved state of `anyState('foobar')` to `state_column` then incompatible version will print `'foobar\0'` on `anyMerge(state_column)`. Also incompatible versions write states of the aggregate functions without trailing `'\0'`. Newer versions (that have the fix) can correctly read data written by all versions including incompatible versions, except one corner case. If an incompatible version saved a state with a string that actually ends with null character, then newer version will trim trailing `'\0'` when reading state of affected aggregate function. For example, if an incompatible version saved state of `anyState('abrac\0dabra\0')` to `state_column` then newer versions will print `'abrac\0dabra'` on `anyMerge(state_column)`. The issue also affects distributed queries when an incompatible version works in a cluster together with older or newer versions. [#43038](https://github.com/ClickHouse/ClickHouse/pull/43038) ([Alexander Tokmakov](https://github.com/tavplubix), [Raúl Marín](https://github.com/Algunenano)). Note: all the official ClickHouse builds already include the patches. This is not necessarily true for unofficial third-party builds that should be avoided. +* Fixed backward incompatibility in (de)serialization of states of `min`, `max`, `any*`, `argMin`, `argMax` aggregate functions with `String` argument. The incompatibility affects 22.9, 22.10 and 22.11 branches (fixed since 22.9.6, 22.10.4 and 22.11.2 correspondingly). Some minor releases of 22.3, 22.7 and 22.8 branches are also affected: 22.3.13...22.3.14 (fixed since 22.3.15), 22.8.6...22.8.9 (fixed since 22.8.10), 22.7.6 and newer (will not be fixed in 22.7, we recommend upgrading from 22.7.* to 22.8.10 or newer). This release note does not concern users that have never used affected versions. Incompatible versions append an extra `'\0'` to strings when reading states of the aggregate functions mentioned above. For example, if an older version saved state of `anyState('foobar')` to `state_column` then the incompatible version will print `'foobar\0'` on `anyMerge(state_column)`. Also incompatible versions write states of the aggregate functions without trailing `'\0'`. Newer versions (that have the fix) can correctly read data written by all versions including incompatible versions, except one corner case. If an incompatible version saved a state with a string that actually ends with null character, then newer version will trim trailing `'\0'` when reading state of affected aggregate function. For example, if an incompatible version saved state of `anyState('abrac\0dabra\0')` to `state_column` then newer versions will print `'abrac\0dabra'` on `anyMerge(state_column)`. The issue also affects distributed queries when an incompatible version works in a cluster together with older or newer versions. [#43038](https://github.com/ClickHouse/ClickHouse/pull/43038) ([Alexander Tokmakov](https://github.com/tavplubix), [Raúl Marín](https://github.com/Algunenano)). Note: all the official ClickHouse builds already include the patches. This is not necessarily true for unofficial third-party builds that should be avoided. #### New Feature -* Add `BSONEachRow` input/output format. In this format, ClickHouse formats/parses each row as a separate BSON document and each column is formatted/parsed as a single BSON field with column name as a key. [#42033](https://github.com/ClickHouse/ClickHouse/pull/42033) ([mark-polokhov](https://github.com/mark-polokhov)). +* Add `BSONEachRow` input/output format. In this format, ClickHouse formats/parses each row as a separate BSON document and each column is formatted/parsed as a single BSON field with the column name as the key. [#42033](https://github.com/ClickHouse/ClickHouse/pull/42033) ([mark-polokhov](https://github.com/mark-polokhov)). * Add `grace_hash` JOIN algorithm, it can be enabled with `SET join_algorithm = 'grace_hash'`. [#38191](https://github.com/ClickHouse/ClickHouse/pull/38191) ([BigRedEye](https://github.com/BigRedEye), [Vladimir C](https://github.com/vdimir)). * Allow configuring password complexity rules and checks for creating and changing users. [#43719](https://github.com/ClickHouse/ClickHouse/pull/43719) ([Nikolay Degterinsky](https://github.com/evillique)). -* Add `CREATE / ALTER / DROP NAMED COLLECTION` queries. [#43252](https://github.com/ClickHouse/ClickHouse/pull/43252) ([Kseniia Sumarokova](https://github.com/kssenii)). Restrict default access to named collections for user defined in config. It must have explicit `show_named_collections = 1` to be able to see them. [#43325](https://github.com/ClickHouse/ClickHouse/pull/43325) ([Kseniia Sumarokova](https://github.com/kssenii)). The `system.named_collections` table is introduced [#43147](https://github.com/ClickHouse/ClickHouse/pull/43147) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Add `CREATE / ALTER / DROP NAMED COLLECTION` queries. [#43252](https://github.com/ClickHouse/ClickHouse/pull/43252) ([Kseniia Sumarokova](https://github.com/kssenii)). Restrict default access to named collections to the user defined in config. This requires that `show_named_collections = 1` is set to be able to see them. [#43325](https://github.com/ClickHouse/ClickHouse/pull/43325) ([Kseniia Sumarokova](https://github.com/kssenii)). The `system.named_collections` table is introduced [#43147](https://github.com/ClickHouse/ClickHouse/pull/43147) ([Kseniia Sumarokova](https://github.com/kssenii)). * Mask sensitive information in logs; mask secret parts in the output of queries `SHOW CREATE TABLE` and `SELECT FROM system.tables`. Also resolves [#41418](https://github.com/ClickHouse/ClickHouse/issues/41418). [#43227](https://github.com/ClickHouse/ClickHouse/pull/43227) ([Vitaly Baranov](https://github.com/vitlibar)). * Add `GROUP BY ALL` syntax: [#37631](https://github.com/ClickHouse/ClickHouse/issues/37631). [#42265](https://github.com/ClickHouse/ClickHouse/pull/42265) ([刘陶峰](https://github.com/taofengliu)). * Add `FROM table SELECT column` syntax. [#41095](https://github.com/ClickHouse/ClickHouse/pull/41095) ([Nikolay Degterinsky](https://github.com/evillique)). @@ -32,23 +32,23 @@ * Added `multiplyDecimal` and `divideDecimal` functions for decimal operations with fixed precision. [#42438](https://github.com/ClickHouse/ClickHouse/pull/42438) ([Andrey Zvonov](https://github.com/zvonand)). * Added `system.moves` table with list of currently moving parts. [#42660](https://github.com/ClickHouse/ClickHouse/pull/42660) ([Sergei Trifonov](https://github.com/serxa)). * Add support for embedded Prometheus endpoint for ClickHouse Keeper. [#43087](https://github.com/ClickHouse/ClickHouse/pull/43087) ([Antonio Andelic](https://github.com/antonio2368)). -* Support numeric literals with `_` as separator as, for example, `1_000_000`. [#43925](https://github.com/ClickHouse/ClickHouse/pull/43925) ([jh0x](https://github.com/jh0x)). -* Added possibility to use array as a second parameter for `cutURLParameter` function. It will cut multiple parameters. Close [#6827](https://github.com/ClickHouse/ClickHouse/issues/6827). [#43788](https://github.com/ClickHouse/ClickHouse/pull/43788) ([Roman Vasin](https://github.com/rvasin)). +* Support numeric literals with `_` as the separator, for example, `1_000_000`. [#43925](https://github.com/ClickHouse/ClickHouse/pull/43925) ([jh0x](https://github.com/jh0x)). +* Added possibility to use an array as a second parameter for `cutURLParameter` function. It will cut multiple parameters. Close [#6827](https://github.com/ClickHouse/ClickHouse/issues/6827). [#43788](https://github.com/ClickHouse/ClickHouse/pull/43788) ([Roman Vasin](https://github.com/rvasin)). * Add a column with the expression of the index in the `system.data_skipping_indices` table. [#43308](https://github.com/ClickHouse/ClickHouse/pull/43308) ([Guillaume Tassery](https://github.com/YiuRULE)). -* Add column `engine_full` to system table `databases` so that users can access whole engine definition of database via system tables. [#43468](https://github.com/ClickHouse/ClickHouse/pull/43468) ([凌涛](https://github.com/lingtaolf)). -* New hash function [xxh3](https://github.com/Cyan4973/xxHash) added. Also performance of `xxHash32` and `xxHash64` improved on arm thanks to library update. [#43411](https://github.com/ClickHouse/ClickHouse/pull/43411) ([Nikita Taranov](https://github.com/nickitat)). +* Add column `engine_full` to system table `databases` so that users can access the entire engine definition of a database via system tables. [#43468](https://github.com/ClickHouse/ClickHouse/pull/43468) ([凌涛](https://github.com/lingtaolf)). +* New hash function [xxh3](https://github.com/Cyan4973/xxHash) added. Also, the performance of `xxHash32` and `xxHash64` are improved on ARM thanks to a library update. [#43411](https://github.com/ClickHouse/ClickHouse/pull/43411) ([Nikita Taranov](https://github.com/nickitat)). * Added support to define constraints for merge tree settings. For example you can forbid overriding the `storage_policy` by users. [#43903](https://github.com/ClickHouse/ClickHouse/pull/43903) ([Sergei Trifonov](https://github.com/serxa)). -* Add a new setting `input_format_json_read_objects_as_strings` that allows to parse nested JSON objects into Strings in all JSON input formats. This setting is disabled by default. [#44052](https://github.com/ClickHouse/ClickHouse/pull/44052) ([Kruglov Pavel](https://github.com/Avogar)). +* Add a new setting `input_format_json_read_objects_as_strings` that allows the parsing of nested JSON objects into Strings in all JSON input formats. This setting is disabled by default. [#44052](https://github.com/ClickHouse/ClickHouse/pull/44052) ([Kruglov Pavel](https://github.com/Avogar)). #### Experimental Feature -* Support deduplication for asynchronous inserts. Before this change async inserts don't support deduplication, because multiple small inserts will coexist in one inserted batch. Closes [#38075](https://github.com/ClickHouse/ClickHouse/issues/38075). [#43304](https://github.com/ClickHouse/ClickHouse/pull/43304) ([Han Fei](https://github.com/hanfei1991)). +* Support deduplication for asynchronous inserts. Before this change, async inserts did not support deduplication, because multiple small inserts coexisted in one inserted batch. Closes [#38075](https://github.com/ClickHouse/ClickHouse/issues/38075). [#43304](https://github.com/ClickHouse/ClickHouse/pull/43304) ([Han Fei](https://github.com/hanfei1991)). * Add support for cosine distance for the experimental Annoy (vector similarity search) index. [#42778](https://github.com/ClickHouse/ClickHouse/pull/42778) ([Filatenkov Artur](https://github.com/FArthur-cmd)). #### Performance Improvement -* Add settings `max_streams_for_merge_tree_reading` and `allow_asynchronous_read_from_io_pool_for_merge_tree`. Setting `max_streams_for_merge_tree_reading` limits the number of reading streams for MergeTree tables. Setting `allow_asynchronous_read_from_io_pool_for_merge_tree` enables background I/O pool to read from `MergeTree` tables. This may increase performance for I/O bound queries if used together with `max_streams_to_max_threads_ratio` or `max_streams_for_merge_tree_reading`. [#43260](https://github.com/ClickHouse/ClickHouse/pull/43260) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). This improves performance up to 100 times in case of high latency storage, low number of CPU and high number of data parts. -* Settings `merge_tree_min_rows_for_concurrent_read_for_remote_filesystem/merge_tree_min_bytes_for_concurrent_read_for_remote_filesystem` did not respect adaptive granularity. Fat rows did not decrease the number of read rows (as it is was done for `merge_tree_min_rows_for_concurrent_read/merge_tree_min_bytes_for_concurrent_read`, which could lead to high memory usage when using remote filesystems. [#43965](https://github.com/ClickHouse/ClickHouse/pull/43965) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Optimized number of list requests to ZooKeeper or Keeper when selecting a part to merge. Previously it could produce thousands of requests in some cases. Fixes [#43647](https://github.com/ClickHouse/ClickHouse/issues/43647). [#43675](https://github.com/ClickHouse/ClickHouse/pull/43675) ([Alexander Tokmakov](https://github.com/tavplubix)). -* Optimisation is getting skipped now if `max_size_to_preallocate_for_aggregation` has too small value. Default value of this setting increased to `10^8`. [#43945](https://github.com/ClickHouse/ClickHouse/pull/43945) ([Nikita Taranov](https://github.com/nickitat)). +* Add settings `max_streams_for_merge_tree_reading` and `allow_asynchronous_read_from_io_pool_for_merge_tree`. Setting `max_streams_for_merge_tree_reading` limits the number of reading streams for MergeTree tables. Setting `allow_asynchronous_read_from_io_pool_for_merge_tree` enables a background I/O pool to read from `MergeTree` tables. This may increase performance for I/O bound queries if used together with `max_streams_to_max_threads_ratio` or `max_streams_for_merge_tree_reading`. [#43260](https://github.com/ClickHouse/ClickHouse/pull/43260) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). This improves performance up to 100 times in case of high latency storage, low number of CPU and high number of data parts. +* Settings `merge_tree_min_rows_for_concurrent_read_for_remote_filesystem/merge_tree_min_bytes_for_concurrent_read_for_remote_filesystem` did not respect adaptive granularity. Fat rows did not decrease the number of read rows (as it was done for `merge_tree_min_rows_for_concurrent_read/merge_tree_min_bytes_for_concurrent_read`, which could lead to high memory usage when using remote filesystems. [#43965](https://github.com/ClickHouse/ClickHouse/pull/43965) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Optimized the number of list requests to ZooKeeper or ClickHouse Keeper when selecting a part to merge. Previously it could produce thousands of requests in some cases. Fixes [#43647](https://github.com/ClickHouse/ClickHouse/issues/43647). [#43675](https://github.com/ClickHouse/ClickHouse/pull/43675) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Optimization is getting skipped now if `max_size_to_preallocate_for_aggregation` has too small a value. The default value of this setting increased to `10^8`. [#43945](https://github.com/ClickHouse/ClickHouse/pull/43945) ([Nikita Taranov](https://github.com/nickitat)). * Speed-up server shutdown by avoiding cleaning up of old data parts. Because it is unnecessary after https://github.com/ClickHouse/ClickHouse/pull/41145. [#43760](https://github.com/ClickHouse/ClickHouse/pull/43760) ([Sema Checherinda](https://github.com/CheSema)). * Merging on initiator now uses the same memory bound approach as merging of local aggregation results if `enable_memory_bound_merging_of_aggregation_results` is set. [#40879](https://github.com/ClickHouse/ClickHouse/pull/40879) ([Nikita Taranov](https://github.com/nickitat)). * Keeper improvement: try syncing logs to disk in parallel with replication. [#43450](https://github.com/ClickHouse/ClickHouse/pull/43450) ([Antonio Andelic](https://github.com/antonio2368)). @@ -56,25 +56,25 @@ #### Improvement * Implement referential dependencies and use them to create tables in the correct order while restoring from a backup. [#43834](https://github.com/ClickHouse/ClickHouse/pull/43834) ([Vitaly Baranov](https://github.com/vitlibar)). -* Substitute UDFs in `CREATE` query to avoid failures during loading at the startup. Additionally, UDFs can now be used as `DEFAULT` expressions for columns. [#43539](https://github.com/ClickHouse/ClickHouse/pull/43539) ([Antonio Andelic](https://github.com/antonio2368)). -* Change how the followed queries delete parts: TRUNCATE TABLE, ALTER TABLE DROP PART, ALTER TABLE DROP PARTITION. Now these queries make empty parts which cover old parts. This makes TRUNCATE query works without exclusive lock which means concurrent reads aren't locked. Also achieved durability in all those queries. If request is succeeded then no resurrected pars appear later. Note that atomicity is achieved only with transaction scope. [#41145](https://github.com/ClickHouse/ClickHouse/pull/41145) ([Sema Checherinda](https://github.com/CheSema)). +* Substitute UDFs in `CREATE` query to avoid failures during loading at startup. Additionally, UDFs can now be used as `DEFAULT` expressions for columns. [#43539](https://github.com/ClickHouse/ClickHouse/pull/43539) ([Antonio Andelic](https://github.com/antonio2368)). +* Change how the following queries delete parts: TRUNCATE TABLE, ALTER TABLE DROP PART, ALTER TABLE DROP PARTITION. Now, these queries make empty parts which cover the old parts. This makes the TRUNCATE query work without a followedexclusive lock which means concurrent reads aren't locked. Also achieved durability in all those queries. If the request succeeds, then no resurrected parts appear later. Note that atomicity is achieved only with transaction scope. [#41145](https://github.com/ClickHouse/ClickHouse/pull/41145) ([Sema Checherinda](https://github.com/CheSema)). * `SET param_x` query no longer requires manual string serialization for the value of the parameter. For example, query `SET param_a = '[\'a\', \'b\']'` can now be written like `SET param_a = ['a', 'b']`. [#41874](https://github.com/ClickHouse/ClickHouse/pull/41874) ([Nikolay Degterinsky](https://github.com/evillique)). -* Show read rows in the progress indication while reading from stdin from client. Closes [#43423](https://github.com/ClickHouse/ClickHouse/issues/43423). [#43442](https://github.com/ClickHouse/ClickHouse/pull/43442) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Show read rows in the progress indication while reading from STDIN from client. Closes [#43423](https://github.com/ClickHouse/ClickHouse/issues/43423). [#43442](https://github.com/ClickHouse/ClickHouse/pull/43442) ([Kseniia Sumarokova](https://github.com/kssenii)). * Show progress bar while reading from s3 table function / engine. [#43454](https://github.com/ClickHouse/ClickHouse/pull/43454) ([Kseniia Sumarokova](https://github.com/kssenii)). * Progress bar will show both read and written rows. [#43496](https://github.com/ClickHouse/ClickHouse/pull/43496) ([Ilya Yatsishin](https://github.com/qoega)). * `filesystemAvailable` and related functions support one optional argument with disk name, and change `filesystemFree` to `filesystemUnreserved`. Closes [#35076](https://github.com/ClickHouse/ClickHouse/issues/35076). [#42064](https://github.com/ClickHouse/ClickHouse/pull/42064) ([flynn](https://github.com/ucasfl)). * Integration with LDAP: increased the default value of search_limit to 256, and added LDAP server config option to change that to an arbitrary value. Closes: [#42276](https://github.com/ClickHouse/ClickHouse/issues/42276). [#42461](https://github.com/ClickHouse/ClickHouse/pull/42461) ([Vasily Nemkov](https://github.com/Enmk)). -* Allow to remove sensitive information (see the `query_masking_rules` in the configuration file) from the exception messages as well. Resolves [#41418](https://github.com/ClickHouse/ClickHouse/issues/41418). [#42940](https://github.com/ClickHouse/ClickHouse/pull/42940) ([filimonov](https://github.com/filimonov)). -* Support query like `SHOW FULL TABLES ...` for MySQL compatibility. [#43910](https://github.com/ClickHouse/ClickHouse/pull/43910) ([Filatenkov Artur](https://github.com/FArthur-cmd)). +* Allow the removal of sensitive information (see the `query_masking_rules` in the configuration file) from the exception messages as well. Resolves [#41418](https://github.com/ClickHouse/ClickHouse/issues/41418). [#42940](https://github.com/ClickHouse/ClickHouse/pull/42940) ([filimonov](https://github.com/filimonov)). +* Support queries like `SHOW FULL TABLES ...` for MySQL compatibility. [#43910](https://github.com/ClickHouse/ClickHouse/pull/43910) ([Filatenkov Artur](https://github.com/FArthur-cmd)). * Keeper improvement: Add 4lw command `rqld` which can manually assign a node as leader. [#43026](https://github.com/ClickHouse/ClickHouse/pull/43026) ([JackyWoo](https://github.com/JackyWoo)). -* Apply connection timeouts settings for Distributed async INSERT from the query. [#43156](https://github.com/ClickHouse/ClickHouse/pull/43156) ([Azat Khuzhin](https://github.com/azat)). +* Apply connection timeout settings for Distributed async INSERT from the query. [#43156](https://github.com/ClickHouse/ClickHouse/pull/43156) ([Azat Khuzhin](https://github.com/azat)). * The `unhex` function now supports `FixedString` arguments. [issue42369](https://github.com/ClickHouse/ClickHouse/issues/42369). [#43207](https://github.com/ClickHouse/ClickHouse/pull/43207) ([DR](https://github.com/freedomDR)). * Priority is given to deleting completely expired parts according to the TTL rules, see [#42869](https://github.com/ClickHouse/ClickHouse/issues/42869). [#43222](https://github.com/ClickHouse/ClickHouse/pull/43222) ([zhongyuankai](https://github.com/zhongyuankai)). * More precise and reactive CPU load indication in clickhouse-client. [#43307](https://github.com/ClickHouse/ClickHouse/pull/43307) ([Sergei Trifonov](https://github.com/serxa)). * Support reading of subcolumns of nested types from storage `S3` and table function `s3` with formats `Parquet`, `Arrow` and `ORC`. [#43329](https://github.com/ClickHouse/ClickHouse/pull/43329) ([chen](https://github.com/xiedeyantu)). * Add `table_uuid` column to the `system.parts` table. [#43404](https://github.com/ClickHouse/ClickHouse/pull/43404) ([Azat Khuzhin](https://github.com/azat)). * Added client option to display the number of locally processed rows in non-interactive mode (`--print-num-processed-rows`). [#43407](https://github.com/ClickHouse/ClickHouse/pull/43407) ([jh0x](https://github.com/jh0x)). -* Implement `aggregation-in-order` optimization on top of query plan. It is enabled by default (but works only together with `optimize_aggregation_in_order`, which is disabled by default). Set `query_plan_aggregation_in_order = 0` to use previous AST-based version. [#43592](https://github.com/ClickHouse/ClickHouse/pull/43592) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Implement `aggregation-in-order` optimization on top of a query plan. It is enabled by default (but works only together with `optimize_aggregation_in_order`, which is disabled by default). Set `query_plan_aggregation_in_order = 0` to use the previous AST-based version. [#43592](https://github.com/ClickHouse/ClickHouse/pull/43592) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). * Allow to collect profile events with `trace_type = 'ProfileEvent'` to `system.trace_log` on each increment with current stack, profile event name and value of the increment. It can be enabled by the setting `trace_profile_events` and used to investigate performance of queries. [#43639](https://github.com/ClickHouse/ClickHouse/pull/43639) ([Anton Popov](https://github.com/CurtizJ)). * Add a new setting `input_format_max_binary_string_size` to limit string size in RowBinary format. [#43842](https://github.com/ClickHouse/ClickHouse/pull/43842) ([Kruglov Pavel](https://github.com/Avogar)). * When ClickHouse requests a remote HTTP server, and it returns an error, the numeric HTTP code was not displayed correctly in the exception message. Closes [#43919](https://github.com/ClickHouse/ClickHouse/issues/43919). [#43920](https://github.com/ClickHouse/ClickHouse/pull/43920) ([Alexey Milovidov](https://github.com/alexey-milovidov)). @@ -82,50 +82,50 @@ #### Build/Testing/Packaging Improvement -* Systemd integration now correctly notifies systemd that service is really started and is ready to server requests. [#43400](https://github.com/ClickHouse/ClickHouse/pull/43400) ([Коренберг Марк](https://github.com/socketpair)). -* If someone wants, they can build ClickHouse with OpenSSL instead of BoringSSL, and even use dynamic library. This type of build is unsupported and not recommended anyhow. It is not tested and therefore not secure. The use-case is to supply the FIPS 140-2 certified build of OpenSSL. [#43991](https://github.com/ClickHouse/ClickHouse/pull/43991) ([Boris Kuschel](https://github.com/bkuschel)). -* This is to upgrade the new `DeflateQpl` compression codec which has been implemented on previous PR (details: https://github.com/ClickHouse/ClickHouse/pull/39494). This patch improves codec on below aspects: 1. QPL v0.2.0 to QPL v0.3.0 [Intel® Query Processing Library (QPL)](https://github.com/intel/qpl) 2. Improve CMake file for fixing QPL build issues for QPL v0.3.0. 3. Link the QPL library with libaccel-config at build time instead of runtime loading on QPL v0.2.0 (dlopen) 4. Fixed log print issue in CompressionCodecDeflateQpl.cpp. [#44024](https://github.com/ClickHouse/ClickHouse/pull/44024) ([jasperzhu](https://github.com/jinjunzh)). +* Systemd integration now correctly notifies systemd that the service is really started and is ready to serve requests. [#43400](https://github.com/ClickHouse/ClickHouse/pull/43400) ([Коренберг Марк](https://github.com/socketpair)). +* If someone wants, they can build ClickHouse with OpenSSL instead of BoringSSL, and even use a dynamic library. This type of build is unsupported and not recommended. It is not tested and therefore may not be secure. The use-case is to supply the FIPS 140-2 certified build of OpenSSL. [#43991](https://github.com/ClickHouse/ClickHouse/pull/43991) ([Boris Kuschel](https://github.com/bkuschel)). +* Upgrade to the new `DeflateQpl` compression codec which has been implemented in a previous PR (details: https://github.com/ClickHouse/ClickHouse/pull/39494). This patch improves codec on below aspects: 1. QPL v0.2.0 to QPL v0.3.0 [Intel® Query Processing Library (QPL)](https://github.com/intel/qpl) 2. Improve CMake file for fixing QPL build issues for QPL v0.3.0. 3. Link the QPL library with libaccel-config at build time instead of runtime loading on QPL v0.2.0 (dlopen) 4. Fixed log print issue in CompressionCodecDeflateQpl.cpp. [#44024](https://github.com/ClickHouse/ClickHouse/pull/44024) ([jasperzhu](https://github.com/jinjunzh)). #### Bug Fix (user-visible misbehavior in official stable or prestable release) * Fixed bug which could lead to deadlock while using asynchronous inserts. [#43233](https://github.com/ClickHouse/ClickHouse/pull/43233) ([Anton Popov](https://github.com/CurtizJ)). * Fix some incorrect logic in AST level optimization `optimize_normalize_count_variants`. [#43873](https://github.com/ClickHouse/ClickHouse/pull/43873) ([Duc Canh Le](https://github.com/canhld94)). -* Fix a case when mutations not making progress when checksums do not match between replicas (e.g. caused by a change in data format on an upgrade). [#36877](https://github.com/ClickHouse/ClickHouse/pull/36877) ([nvartolomei](https://github.com/nvartolomei)). +* Fix a case when mutations are not making progress when checksums do not match between replicas (e.g. caused by a change in data format on an upgrade). [#36877](https://github.com/ClickHouse/ClickHouse/pull/36877) ([nvartolomei](https://github.com/nvartolomei)). * Fix the `skip_unavailable_shards` optimization which did not work with the `hdfsCluster` table function. [#43236](https://github.com/ClickHouse/ClickHouse/pull/43236) ([chen](https://github.com/xiedeyantu)). * Fix `s3` support for the `?` wildcard. Closes [#42731](https://github.com/ClickHouse/ClickHouse/issues/42731). [#43253](https://github.com/ClickHouse/ClickHouse/pull/43253) ([chen](https://github.com/xiedeyantu)). -* Fix functions `arrayFirstOrNull` and `arrayLastOrNull` or null when array contains `Nullable` elements. [#43274](https://github.com/ClickHouse/ClickHouse/pull/43274) ([Duc Canh Le](https://github.com/canhld94)). +* Fix functions `arrayFirstOrNull` and `arrayLastOrNull` or null when the array contains `Nullable` elements. [#43274](https://github.com/ClickHouse/ClickHouse/pull/43274) ([Duc Canh Le](https://github.com/canhld94)). * Fix incorrect `UserTimeMicroseconds`/`SystemTimeMicroseconds` accounting related to Kafka tables. [#42791](https://github.com/ClickHouse/ClickHouse/pull/42791) ([Azat Khuzhin](https://github.com/azat)). * Do not suppress exceptions in `web` disks. Fix retries for the `web` disk. [#42800](https://github.com/ClickHouse/ClickHouse/pull/42800) ([Azat Khuzhin](https://github.com/azat)). -* Fixed (logical) race condition between inserts and dropping materialized views. A race condition happened when a Materialized View was dropped at the same time as an INSERT, where the MVs was present as a dependency of the insert at the beggining of the execution, but the table has been dropped by the time the insert chain tries to access to it, producing either an `UNKNOWN_TABLE` or `TABLE_IS_DROPPED` exception, and stopping the insertion. After this change we avoid these exceptions and just continue with the insert if the dependency is gone. [#43161](https://github.com/ClickHouse/ClickHouse/pull/43161) ([AlfVII](https://github.com/AlfVII)). +* Fixed (logical) race condition between inserts and dropping materialized views. A race condition happened when a Materialized View was dropped at the same time as an INSERT, where the MVs were present as a dependency of the insert at the begining of the execution, but the table has been dropped by the time the insert chain tries to access it, producing either an `UNKNOWN_TABLE` or `TABLE_IS_DROPPED` exception, and stopping the insertion. After this change, we avoid these exceptions and just continue with the insert if the dependency is gone. [#43161](https://github.com/ClickHouse/ClickHouse/pull/43161) ([AlfVII](https://github.com/AlfVII)). * Fix undefined behavior in the `quantiles` function, which might lead to uninitialized memory. Found by fuzzer. This closes [#44066](https://github.com/ClickHouse/ClickHouse/issues/44066). [#44067](https://github.com/ClickHouse/ClickHouse/pull/44067) ([Alexey Milovidov](https://github.com/alexey-milovidov)). * Additional check on zero uncompressed size is added to `CompressionCodecDelta`. [#43255](https://github.com/ClickHouse/ClickHouse/pull/43255) ([Nikita Taranov](https://github.com/nickitat)). * Flatten arrays from Parquet to avoid an issue with inconsistent data in arrays. These incorrect files can be generated by Apache Iceberg. [#43297](https://github.com/ClickHouse/ClickHouse/pull/43297) ([Arthur Passos](https://github.com/arthurpassos)). * Fix bad cast from `LowCardinality` column when using short circuit function execution. [#43311](https://github.com/ClickHouse/ClickHouse/pull/43311) ([Kruglov Pavel](https://github.com/Avogar)). * Fixed queries with `SAMPLE BY` with prewhere optimization on tables using `Merge` engine. [#43315](https://github.com/ClickHouse/ClickHouse/pull/43315) ([Antonio Andelic](https://github.com/antonio2368)). -* Check and compare the content of the `format_version` file in `MergeTreeData` so tables can be loaded even if the storage policy was changed. [#43328](https://github.com/ClickHouse/ClickHouse/pull/43328) ([Antonio Andelic](https://github.com/antonio2368)). +* Check and compare the content of the `format_version` file in `MergeTreeData` so that tables can be loaded even if the storage policy was changed. [#43328](https://github.com/ClickHouse/ClickHouse/pull/43328) ([Antonio Andelic](https://github.com/antonio2368)). * Fix possible (very unlikely) "No column to rollback" logical error during INSERT into `Buffer` tables. [#43336](https://github.com/ClickHouse/ClickHouse/pull/43336) ([Azat Khuzhin](https://github.com/azat)). * Fix a bug that allowed the parser to parse an unlimited amount of round brackets into one function if `allow_function_parameters` is set. [#43350](https://github.com/ClickHouse/ClickHouse/pull/43350) ([Nikolay Degterinsky](https://github.com/evillique)). * `MaterializeMySQL` (experimental feature) support DDL: `drop table t1, t2` and compatible with most of MySQL DROP DDL. [#43366](https://github.com/ClickHouse/ClickHouse/pull/43366) ([zzsmdfj](https://github.com/zzsmdfj)). -* `session_log` (experimental feature): Fixed the unability to log in (because of failure to create the session_log entry) in a very rare case of messed up setting profiles. [#42641](https://github.com/ClickHouse/ClickHouse/pull/42641) ([Vasily Nemkov](https://github.com/Enmk)). +* `session_log` (experimental feature): Fixed the inability to log in (because of failure to create the session_log entry) in a very rare case of messed up setting profiles. [#42641](https://github.com/ClickHouse/ClickHouse/pull/42641) ([Vasily Nemkov](https://github.com/Enmk)). * Fix possible `Cannot create non-empty column with type Nothing` in functions `if`/`multiIf`. Closes [#43356](https://github.com/ClickHouse/ClickHouse/issues/43356). [#43368](https://github.com/ClickHouse/ClickHouse/pull/43368) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix a bug when a row level filter uses default value of column. [#43387](https://github.com/ClickHouse/ClickHouse/pull/43387) ([Alexander Gololobov](https://github.com/davenger)). +* Fix a bug when a row level filter uses the default value of a column. [#43387](https://github.com/ClickHouse/ClickHouse/pull/43387) ([Alexander Gololobov](https://github.com/davenger)). * Query with `DISTINCT` + `LIMIT BY` + `LIMIT` can return fewer rows than expected. Fixes [#43377](https://github.com/ClickHouse/ClickHouse/issues/43377). [#43410](https://github.com/ClickHouse/ClickHouse/pull/43410) ([Igor Nikonov](https://github.com/devcrafter)). * Fix `sumMap` for `Nullable(Decimal(...))`. [#43414](https://github.com/ClickHouse/ClickHouse/pull/43414) ([Azat Khuzhin](https://github.com/azat)). * Fix `date_diff` for hour/minute on macOS. Close [#42742](https://github.com/ClickHouse/ClickHouse/issues/42742). [#43466](https://github.com/ClickHouse/ClickHouse/pull/43466) ([zzsmdfj](https://github.com/zzsmdfj)). * Fix incorrect memory accounting because of merges/mutations. [#43516](https://github.com/ClickHouse/ClickHouse/pull/43516) ([Azat Khuzhin](https://github.com/azat)). * Fixed primary key analysis with conditions involving `toString(enum)`. [#43596](https://github.com/ClickHouse/ClickHouse/pull/43596) ([Nikita Taranov](https://github.com/nickitat)). This error has been found by @tisonkun. -* Ensure consistency when `clickhouse-copier` update status and `attach_is_done` in keeper after partition attach is done. [#43602](https://github.com/ClickHouse/ClickHouse/pull/43602) ([lzydmxy](https://github.com/lzydmxy)). -* During recovering of the lost replica of a `Replicated` database (experimental feature) there could a situation where we need to atomically swap two table names (use EXCHANGE), but instead previously we tried to use two RENAME queries. Which was obviously failed and moreover failed the whole recovery process of the database replica. [#43628](https://github.com/ClickHouse/ClickHouse/pull/43628) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). -* Fix the case when `s3Cluster` function throws `NOT_FOUND_COLUMN_IN_BLOCK` error. Closes [#43534](https://github.com/ClickHouse/ClickHouse/issues/43534). [#43629](https://github.com/ClickHouse/ClickHouse/pull/43629) ([chen](https://github.com/xiedeyantu)). -* Fix posssible logical error `Array sizes mismatched` while parsing JSON object with arrays with same key names but with different nesting level. Closes [#43569](https://github.com/ClickHouse/ClickHouse/issues/43569). [#43693](https://github.com/ClickHouse/ClickHouse/pull/43693) ([Kruglov Pavel](https://github.com/Avogar)). -* Fixed possible exception in case of distributed `GROUP BY` with an `ALIAS` column among aggregation keys. [#43709](https://github.com/ClickHouse/ClickHouse/pull/43709) ([Nikita Taranov](https://github.com/nickitat)). +* Ensure consistency when `clickhouse-copier` updates status and `attach_is_done` in Keeper after partition attach is done. [#43602](https://github.com/ClickHouse/ClickHouse/pull/43602) ([lzydmxy](https://github.com/lzydmxy)). +* During the recovery of a lost replica of a `Replicated` database (experimental feature), there could a situation where we need to atomically swap two table names (use EXCHANGE). Previously we tried to use two RENAME queries, which was obviously failing and moreover, failed the whole recovery process of the database replica. [#43628](https://github.com/ClickHouse/ClickHouse/pull/43628) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Fix the case when the `s3Cluster` function throws `NOT_FOUND_COLUMN_IN_BLOCK` error. Closes [#43534](https://github.com/ClickHouse/ClickHouse/issues/43534). [#43629](https://github.com/ClickHouse/ClickHouse/pull/43629) ([chen](https://github.com/xiedeyantu)). +* Fix possible logical error `Array sizes mismatched` while parsing JSON object with arrays with same key names but with different nesting level. Closes [#43569](https://github.com/ClickHouse/ClickHouse/issues/43569). [#43693](https://github.com/ClickHouse/ClickHouse/pull/43693) ([Kruglov Pavel](https://github.com/Avogar)). +* Fixed possible exception in the case of distributed `GROUP BY` with an `ALIAS` column among aggregation keys. [#43709](https://github.com/ClickHouse/ClickHouse/pull/43709) ([Nikita Taranov](https://github.com/nickitat)). * Fix bug which can lead to broken projections if zero-copy replication (experimental feature) is enabled and used. [#43764](https://github.com/ClickHouse/ClickHouse/pull/43764) ([alesapin](https://github.com/alesapin)). * Fix using multipart upload for very large S3 objects in AWS S3. [#43824](https://github.com/ClickHouse/ClickHouse/pull/43824) ([ianton-ru](https://github.com/ianton-ru)). -* Fixed `ALTER ... RESET SETTING` with `ON CLUSTER`. It could be applied to one replica only. Fixes [#43843](https://github.com/ClickHouse/ClickHouse/issues/43843). [#43848](https://github.com/ClickHouse/ClickHouse/pull/43848) ([Elena Torró](https://github.com/elenatorro)). +* Fixed `ALTER ... RESET SETTING` with `ON CLUSTER`. It could have been applied to one replica only. Fixes [#43843](https://github.com/ClickHouse/ClickHouse/issues/43843). [#43848](https://github.com/ClickHouse/ClickHouse/pull/43848) ([Elena Torró](https://github.com/elenatorro)). * Fix a logical error in JOIN with `Join` table engine at right hand side, if `USING` is being used. [#43963](https://github.com/ClickHouse/ClickHouse/pull/43963) ([Vladimir C](https://github.com/vdimir)). Fix a bug with wrong order of keys in `Join` table engine. [#44012](https://github.com/ClickHouse/ClickHouse/pull/44012) ([Vladimir C](https://github.com/vdimir)). -* Keeper fix: throw if interserver port for Raft is already in use. [#43984](https://github.com/ClickHouse/ClickHouse/pull/43984) ([Antonio Andelic](https://github.com/antonio2368)). +* Keeper fix: throw if the interserver port for Raft is already in use. [#43984](https://github.com/ClickHouse/ClickHouse/pull/43984) ([Antonio Andelic](https://github.com/antonio2368)). * Fix ORDER BY positional argument (example: `ORDER BY 1, 2`) in case of unneeded columns pruning from subqueries. Closes [#43964](https://github.com/ClickHouse/ClickHouse/issues/43964). [#43987](https://github.com/ClickHouse/ClickHouse/pull/43987) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fixed exception when a subquery contains HAVING but doesn't contain actual aggregation. [#44051](https://github.com/ClickHouse/ClickHouse/pull/44051) ([Nikita Taranov](https://github.com/nickitat)). +* Fixed exception when a subquery contains HAVING but doesn't contain an actual aggregation. [#44051](https://github.com/ClickHouse/ClickHouse/pull/44051) ([Nikita Taranov](https://github.com/nickitat)). * Fix race in s3 multipart upload. This race could cause the error `Part number must be an integer between 1 and 10000, inclusive. (S3_ERROR)` while restoring from a backup. [#44065](https://github.com/ClickHouse/ClickHouse/pull/44065) ([Vitaly Baranov](https://github.com/vitlibar)). @@ -651,30 +651,30 @@ * Add counters (ProfileEvents) for cases when query complexity limitation has been set and has reached (a separate counter for `overflow_mode` = `break` and `throw`). For example, if you have set up `max_rows_to_read` with `read_overflow_mode = 'break'`, looking at the value of `OverflowBreak` counter will allow distinguishing incomplete results. [#40205](https://github.com/ClickHouse/ClickHouse/pull/40205) ([Alexey Milovidov](https://github.com/alexey-milovidov)). * Fix memory accounting in case of "Memory limit exceeded" errors (previously [peak] memory usage was takes failed allocations into account). [#40249](https://github.com/ClickHouse/ClickHouse/pull/40249) ([Azat Khuzhin](https://github.com/azat)). * Add metrics for filesystem cache: `FilesystemCacheSize` and `FilesystemCacheElements`. [#40260](https://github.com/ClickHouse/ClickHouse/pull/40260) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Support hadoop secure RPC transfer (hadoop.rpc.protection=privacy and hadoop.rpc.protection=integrity). [#39411](https://github.com/ClickHouse/ClickHouse/pull/39411) ([michael1589](https://github.com/michael1589)). +* Support Hadoop secure RPC transfer (hadoop.rpc.protection=privacy and hadoop.rpc.protection=integrity). [#39411](https://github.com/ClickHouse/ClickHouse/pull/39411) ([michael1589](https://github.com/michael1589)). * Avoid continuously growing memory consumption of pattern cache when using functions multi(Fuzzy)Match(Any|AllIndices|AnyIndex)(). [#40264](https://github.com/ClickHouse/ClickHouse/pull/40264) ([Robert Schulze](https://github.com/rschu1ze)). -* Add cache for schema inference for file/s3/hdfs/url table functions. Now, schema inference will be performed only on the first query to the file, all subsequent queries to the same file will use the schema from cache if data wasn't changed. Add system table system.schema_inference_cache with all current schemas in cache and system queries SYSTEM DROP SCHEMA CACHE [FOR FILE/S3/HDFS/URL] to drop schemas from cache. [#38286](https://github.com/ClickHouse/ClickHouse/pull/38286) ([Kruglov Pavel](https://github.com/Avogar)). +* Add cache for schema inference for file/s3/hdfs/url table functions. Now, schema inference will be performed only on the first query to the file, all subsequent queries to the same file will use the schema from the cache if data has not changed. Add system table system.schema_inference_cache with all current schemas in cache and system queries SYSTEM DROP SCHEMA CACHE [FOR FILE/S3/HDFS/URL] to drop schemas from cache. [#38286](https://github.com/ClickHouse/ClickHouse/pull/38286) ([Kruglov Pavel](https://github.com/Avogar)). * Add support for LARGE_BINARY/LARGE_STRING with Arrow (Closes [#32401](https://github.com/ClickHouse/ClickHouse/issues/32401)). [#40293](https://github.com/ClickHouse/ClickHouse/pull/40293) ([Josh Taylor](https://github.com/joshuataylor)). #### Build/Testing/Packaging Improvement * [ClickFiddle](https://fiddle.clickhouse.com/): A new tool for testing ClickHouse versions in read/write mode (**Igor Baliuk**). * ClickHouse binary is made self-extracting [#35775](https://github.com/ClickHouse/ClickHouse/pull/35775) ([Yakov Olkhovskiy, Arthur Filatenkov](https://github.com/yakov-olkhovskiy)). -* Update tzdata to 2022b to support the new timezone changes. See https://github.com/google/cctz/pull/226. Chile's 2022 DST start is delayed from September 4 to September 11. Iran plans to stop observing DST permanently, after it falls back on 2022-09-21. There are corrections of the historical time zone of Asia/Tehran in the year 1977: Iran adopted standard time in 1935, not 1946. In 1977 it observed DST from 03-21 23:00 to 10-20 24:00; its 1978 transitions were on 03-24 and 08-05, not 03-20 and 10-20; and its spring 1979 transition was on 05-27, not 03-21 (https://data.iana.org/time-zones/tzdb/NEWS). ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Former packages used to install systemd.service file to `/etc`. The files there are marked as `conf` and are not cleaned out, and not updated automatically. This PR cleans them out. [#39323](https://github.com/ClickHouse/ClickHouse/pull/39323) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Update `tzdata` to 2022b to support the new timezone changes. See https://github.com/google/cctz/pull/226. Chile's 2022 DST start is delayed from September 4 to September 11. Iran plans to stop observing DST permanently after it falls back on 2022-09-21. There are corrections to the historical time zone of Asia/Tehran in the year 1977: Iran adopted standard time in 1935, not 1946. In 1977 it observed DST from 03-21 23:00 to 10-20 24:00; its 1978 transitions were on 03-24 and 08-05, not 03-20 and 10-20; and its spring 1979 transition was on 05-27, not 03-21 (https://data.iana.org/time-zones/tzdb/NEWS). ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Former packages used to install systemd.service file to `/etc`. The files there are marked as `conf` and are not cleaned out, and are not updated automatically. This PR cleans them out. [#39323](https://github.com/ClickHouse/ClickHouse/pull/39323) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). * Ensure LSan is effective. [#39430](https://github.com/ClickHouse/ClickHouse/pull/39430) ([Azat Khuzhin](https://github.com/azat)). * TSAN has issues with clang-14 (https://github.com/google/sanitizers/issues/1552, https://github.com/google/sanitizers/issues/1540), so here we build the TSAN binaries with clang-15. [#39450](https://github.com/ClickHouse/ClickHouse/pull/39450) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). * Remove the option to build ClickHouse tools as separate executable programs. This fixes [#37847](https://github.com/ClickHouse/ClickHouse/issues/37847). [#39520](https://github.com/ClickHouse/ClickHouse/pull/39520) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Small preparations for build on s390x (which is big-endian). [#39627](https://github.com/ClickHouse/ClickHouse/pull/39627) ([Harry Lee](https://github.com/HarryLeeIBM)). [#39656](https://github.com/ClickHouse/ClickHouse/pull/39656) ([Harry Lee](https://github.com/HarryLeeIBM)). Fixed Endian issue in BitHelpers for s390x. [#39656](https://github.com/ClickHouse/ClickHouse/pull/39656) ([Harry Lee](https://github.com/HarryLeeIBM)). Implement a piece of code related to SipHash for s390x architecture (which is not supported by ClickHouse). [#39732](https://github.com/ClickHouse/ClickHouse/pull/39732) ([Harry Lee](https://github.com/HarryLeeIBM)). Fixed an Endian issue in Coordination snapshot code for s390x architecture (which is not supported by ClickHouse). [#39931](https://github.com/ClickHouse/ClickHouse/pull/39931) ([Harry Lee](https://github.com/HarryLeeIBM)). Fixed Endian issues in Codec code for s390x architecture (which is not supported by ClickHouse). [#40008](https://github.com/ClickHouse/ClickHouse/pull/40008) ([Harry Lee](https://github.com/HarryLeeIBM)). Fixed Endian issues in reading/writing BigEndian binary data in ReadHelpers and WriteHelpers code for s390x architecture (which is not supported by ClickHouse). [#40179](https://github.com/ClickHouse/ClickHouse/pull/40179) ([Harry Lee](https://github.com/HarryLeeIBM)). +* Small preparations for build on s390x (which is big-endian). [#39627](https://github.com/ClickHouse/ClickHouse/pull/39627) ([Harry Lee](https://github.com/HarryLeeIBM)). [#39656](https://github.com/ClickHouse/ClickHouse/pull/39656) ([Harry Lee](https://github.com/HarryLeeIBM)). Fixed Endian issue in BitHelpers for s390x. [#39656](https://github.com/ClickHouse/ClickHouse/pull/39656) ([Harry Lee](https://github.com/HarryLeeIBM)). Implement a piece of code related to SipHash for s390x architecture (which is not supported by ClickHouse). [#39732](https://github.com/ClickHouse/ClickHouse/pull/39732) ([Harry Lee](https://github.com/HarryLeeIBM)). Fixed an Endian issue in the Coordination snapshot code for s390x architecture (which is not supported by ClickHouse). [#39931](https://github.com/ClickHouse/ClickHouse/pull/39931) ([Harry Lee](https://github.com/HarryLeeIBM)). Fixed Endian issues in Codec code for s390x architecture (which is not supported by ClickHouse). [#40008](https://github.com/ClickHouse/ClickHouse/pull/40008) ([Harry Lee](https://github.com/HarryLeeIBM)). Fixed Endian issues in reading/writing BigEndian binary data in ReadHelpers and WriteHelpers code for s390x architecture (which is not supported by ClickHouse). [#40179](https://github.com/ClickHouse/ClickHouse/pull/40179) ([Harry Lee](https://github.com/HarryLeeIBM)). * Support build with `clang-16` (trunk). This closes [#39949](https://github.com/ClickHouse/ClickHouse/issues/39949). [#40181](https://github.com/ClickHouse/ClickHouse/pull/40181) ([Alexey Milovidov](https://github.com/alexey-milovidov)). * Prepare RISC-V 64 build to run in CI. This is for [#40141](https://github.com/ClickHouse/ClickHouse/issues/40141). [#40197](https://github.com/ClickHouse/ClickHouse/pull/40197) ([Alexey Milovidov](https://github.com/alexey-milovidov)). * Simplified function registration macro interface (`FUNCTION_REGISTER*`) to eliminate the step to add and call an extern function in the registerFunctions.cpp, it also makes incremental builds of a new function faster. [#38615](https://github.com/ClickHouse/ClickHouse/pull/38615) ([Li Yin](https://github.com/liyinsg)). -* Docker: Now entrypoint.sh in docker image creates and executes chown for all folders it found in config for multidisk setup [#17717](https://github.com/ClickHouse/ClickHouse/issues/17717). [#39121](https://github.com/ClickHouse/ClickHouse/pull/39121) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Docker: Now entrypoint.sh in docker image creates and executes chown for all folders it finds in the config for multidisk setup [#17717](https://github.com/ClickHouse/ClickHouse/issues/17717). [#39121](https://github.com/ClickHouse/ClickHouse/pull/39121) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). #### Bug Fix -* Fix possible segfault in `CapnProto` input format. This bug was found and send through ClickHouse bug-bounty [program](https://github.com/ClickHouse/ClickHouse/issues/38986) by *kiojj*. [#40241](https://github.com/ClickHouse/ClickHouse/pull/40241) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix a very rare case of incorrect behavior of array subscript operator. This closes [#28720](https://github.com/ClickHouse/ClickHouse/issues/28720). [#40185](https://github.com/ClickHouse/ClickHouse/pull/40185) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix possible segfault in `CapnProto` input format. This bug was found and sent in through the ClickHouse bug-bounty [program](https://github.com/ClickHouse/ClickHouse/issues/38986) by *kiojj*. [#40241](https://github.com/ClickHouse/ClickHouse/pull/40241) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix a very rare case of incorrect behavior of the array subscript operator. This closes [#28720](https://github.com/ClickHouse/ClickHouse/issues/28720). [#40185](https://github.com/ClickHouse/ClickHouse/pull/40185) ([Alexey Milovidov](https://github.com/alexey-milovidov)). * Fix insufficient argument check for encryption functions (found by query fuzzer). This closes [#39987](https://github.com/ClickHouse/ClickHouse/issues/39987). [#40194](https://github.com/ClickHouse/ClickHouse/pull/40194) ([Alexey Milovidov](https://github.com/alexey-milovidov)). * Fix the case when the order of columns can be incorrect if the `IN` operator is used with a table with `ENGINE = Set` containing multiple columns. This fixes [#13014](https://github.com/ClickHouse/ClickHouse/issues/13014). [#40225](https://github.com/ClickHouse/ClickHouse/pull/40225) ([Alexey Milovidov](https://github.com/alexey-milovidov)). * Fix seeking while reading from encrypted disk. This PR fixes [#38381](https://github.com/ClickHouse/ClickHouse/issues/38381). [#39687](https://github.com/ClickHouse/ClickHouse/pull/39687) ([Vitaly Baranov](https://github.com/vitlibar)). From 6ac4586577ecb3a733f56d07c1cc00384bb31bb4 Mon Sep 17 00:00:00 2001 From: DanRoscigno Date: Wed, 14 Dec 2022 10:21:14 -0500 Subject: [PATCH 30/35] clarify FIPS change is for testing --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1393eb7b1d0..ed898c238b7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -83,7 +83,7 @@ #### Build/Testing/Packaging Improvement * Systemd integration now correctly notifies systemd that the service is really started and is ready to serve requests. [#43400](https://github.com/ClickHouse/ClickHouse/pull/43400) ([Коренберг Марк](https://github.com/socketpair)). -* If someone wants, they can build ClickHouse with OpenSSL instead of BoringSSL, and even use a dynamic library. This type of build is unsupported and not recommended. It is not tested and therefore may not be secure. The use-case is to supply the FIPS 140-2 certified build of OpenSSL. [#43991](https://github.com/ClickHouse/ClickHouse/pull/43991) ([Boris Kuschel](https://github.com/bkuschel)). +* Added the option to build ClickHouse with OpenSSL using the OpenSSL FIPS Module (https://www.openssl.org/docs/man3.0/man7/fips_module.html). This build type has not been tested to validate security and is not supported. [#43991](https://github.com/ClickHouse/ClickHouse/pull/43991) ([Boris Kuschel](https://github.com/bkuschel)). * Upgrade to the new `DeflateQpl` compression codec which has been implemented in a previous PR (details: https://github.com/ClickHouse/ClickHouse/pull/39494). This patch improves codec on below aspects: 1. QPL v0.2.0 to QPL v0.3.0 [Intel® Query Processing Library (QPL)](https://github.com/intel/qpl) 2. Improve CMake file for fixing QPL build issues for QPL v0.3.0. 3. Link the QPL library with libaccel-config at build time instead of runtime loading on QPL v0.2.0 (dlopen) 4. Fixed log print issue in CompressionCodecDeflateQpl.cpp. [#44024](https://github.com/ClickHouse/ClickHouse/pull/44024) ([jasperzhu](https://github.com/jinjunzh)). #### Bug Fix (user-visible misbehavior in official stable or prestable release) From e19be861be6b3d7653c530da275cb009b6fed833 Mon Sep 17 00:00:00 2001 From: DanRoscigno Date: Wed, 14 Dec 2022 10:24:55 -0500 Subject: [PATCH 31/35] clarify FIPS change is for testing --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ed898c238b7..8b2ba9ae40c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -83,7 +83,7 @@ #### Build/Testing/Packaging Improvement * Systemd integration now correctly notifies systemd that the service is really started and is ready to serve requests. [#43400](https://github.com/ClickHouse/ClickHouse/pull/43400) ([Коренберг Марк](https://github.com/socketpair)). -* Added the option to build ClickHouse with OpenSSL using the OpenSSL FIPS Module (https://www.openssl.org/docs/man3.0/man7/fips_module.html). This build type has not been tested to validate security and is not supported. [#43991](https://github.com/ClickHouse/ClickHouse/pull/43991) ([Boris Kuschel](https://github.com/bkuschel)). +* Added the option to build ClickHouse with OpenSSL using the [OpenSSL FIPS Module](https://www.openssl.org/docs/man3.0/man7/fips_module.html). This build type has not been tested to validate security and is not supported. [#43991](https://github.com/ClickHouse/ClickHouse/pull/43991) ([Boris Kuschel](https://github.com/bkuschel)). * Upgrade to the new `DeflateQpl` compression codec which has been implemented in a previous PR (details: https://github.com/ClickHouse/ClickHouse/pull/39494). This patch improves codec on below aspects: 1. QPL v0.2.0 to QPL v0.3.0 [Intel® Query Processing Library (QPL)](https://github.com/intel/qpl) 2. Improve CMake file for fixing QPL build issues for QPL v0.3.0. 3. Link the QPL library with libaccel-config at build time instead of runtime loading on QPL v0.2.0 (dlopen) 4. Fixed log print issue in CompressionCodecDeflateQpl.cpp. [#44024](https://github.com/ClickHouse/ClickHouse/pull/44024) ([jasperzhu](https://github.com/jinjunzh)). #### Bug Fix (user-visible misbehavior in official stable or prestable release) From c67dfc6968f5195297f2872cec4f38de3f542dbe Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 14 Dec 2022 21:46:43 +0300 Subject: [PATCH 32/35] Update CHANGELOG.md --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8b2ba9ae40c..d60b53a1f22 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -24,7 +24,6 @@ * Add `BSONEachRow` input/output format. In this format, ClickHouse formats/parses each row as a separate BSON document and each column is formatted/parsed as a single BSON field with the column name as the key. [#42033](https://github.com/ClickHouse/ClickHouse/pull/42033) ([mark-polokhov](https://github.com/mark-polokhov)). * Add `grace_hash` JOIN algorithm, it can be enabled with `SET join_algorithm = 'grace_hash'`. [#38191](https://github.com/ClickHouse/ClickHouse/pull/38191) ([BigRedEye](https://github.com/BigRedEye), [Vladimir C](https://github.com/vdimir)). * Allow configuring password complexity rules and checks for creating and changing users. [#43719](https://github.com/ClickHouse/ClickHouse/pull/43719) ([Nikolay Degterinsky](https://github.com/evillique)). -* Add `CREATE / ALTER / DROP NAMED COLLECTION` queries. [#43252](https://github.com/ClickHouse/ClickHouse/pull/43252) ([Kseniia Sumarokova](https://github.com/kssenii)). Restrict default access to named collections to the user defined in config. This requires that `show_named_collections = 1` is set to be able to see them. [#43325](https://github.com/ClickHouse/ClickHouse/pull/43325) ([Kseniia Sumarokova](https://github.com/kssenii)). The `system.named_collections` table is introduced [#43147](https://github.com/ClickHouse/ClickHouse/pull/43147) ([Kseniia Sumarokova](https://github.com/kssenii)). * Mask sensitive information in logs; mask secret parts in the output of queries `SHOW CREATE TABLE` and `SELECT FROM system.tables`. Also resolves [#41418](https://github.com/ClickHouse/ClickHouse/issues/41418). [#43227](https://github.com/ClickHouse/ClickHouse/pull/43227) ([Vitaly Baranov](https://github.com/vitlibar)). * Add `GROUP BY ALL` syntax: [#37631](https://github.com/ClickHouse/ClickHouse/issues/37631). [#42265](https://github.com/ClickHouse/ClickHouse/pull/42265) ([刘陶峰](https://github.com/taofengliu)). * Add `FROM table SELECT column` syntax. [#41095](https://github.com/ClickHouse/ClickHouse/pull/41095) ([Nikolay Degterinsky](https://github.com/evillique)). @@ -43,6 +42,7 @@ #### Experimental Feature * Support deduplication for asynchronous inserts. Before this change, async inserts did not support deduplication, because multiple small inserts coexisted in one inserted batch. Closes [#38075](https://github.com/ClickHouse/ClickHouse/issues/38075). [#43304](https://github.com/ClickHouse/ClickHouse/pull/43304) ([Han Fei](https://github.com/hanfei1991)). * Add support for cosine distance for the experimental Annoy (vector similarity search) index. [#42778](https://github.com/ClickHouse/ClickHouse/pull/42778) ([Filatenkov Artur](https://github.com/FArthur-cmd)). +* Add `CREATE / ALTER / DROP NAMED COLLECTION` queries. [#43252](https://github.com/ClickHouse/ClickHouse/pull/43252) ([Kseniia Sumarokova](https://github.com/kssenii)). This feature is under development and the queries are not effective as of version 22.12. This changelog entry is added only to avoid confusion. Restrict default access to named collections to the user defined in config. This requires that `show_named_collections = 1` is set to be able to see them. [#43325](https://github.com/ClickHouse/ClickHouse/pull/43325) ([Kseniia Sumarokova](https://github.com/kssenii)). The `system.named_collections` table is introduced [#43147](https://github.com/ClickHouse/ClickHouse/pull/43147) ([Kseniia Sumarokova](https://github.com/kssenii)). #### Performance Improvement * Add settings `max_streams_for_merge_tree_reading` and `allow_asynchronous_read_from_io_pool_for_merge_tree`. Setting `max_streams_for_merge_tree_reading` limits the number of reading streams for MergeTree tables. Setting `allow_asynchronous_read_from_io_pool_for_merge_tree` enables a background I/O pool to read from `MergeTree` tables. This may increase performance for I/O bound queries if used together with `max_streams_to_max_threads_ratio` or `max_streams_for_merge_tree_reading`. [#43260](https://github.com/ClickHouse/ClickHouse/pull/43260) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). This improves performance up to 100 times in case of high latency storage, low number of CPU and high number of data parts. From 8870beb42e0d48ebc1af61360e2fbc86f52075e1 Mon Sep 17 00:00:00 2001 From: Dan Roscigno Date: Wed, 14 Dec 2022 13:55:42 -0500 Subject: [PATCH 33/35] Update summingmergetree.md --- .../engines/table-engines/mergetree-family/summingmergetree.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/engines/table-engines/mergetree-family/summingmergetree.md b/docs/en/engines/table-engines/mergetree-family/summingmergetree.md index 5a2c0718610..49f90d1c292 100644 --- a/docs/en/engines/table-engines/mergetree-family/summingmergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/summingmergetree.md @@ -34,7 +34,7 @@ For a description of request parameters, see [request description](../../../sql- `columns` - a tuple with the names of columns where values will be summarized. Optional parameter. The columns must be of a numeric type and must not be in the primary key. - If `columns` not specified, ClickHouse summarizes the values in all columns with a numeric data type that are not in the primary key. + If `columns` is not specified, ClickHouse summarizes the values in all columns with a numeric data type that are not in the primary key. ### Query clauses From 903780033e4e37c738b23a481493dd96d1ddac72 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Wed, 14 Dec 2022 22:30:23 +0300 Subject: [PATCH 34/35] Update StorageSystemDatabases.cpp --- src/Storages/System/StorageSystemDatabases.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Storages/System/StorageSystemDatabases.cpp b/src/Storages/System/StorageSystemDatabases.cpp index a73559c2c10..457d8eabf3f 100644 --- a/src/Storages/System/StorageSystemDatabases.cpp +++ b/src/Storages/System/StorageSystemDatabases.cpp @@ -49,6 +49,7 @@ static String getEngineFull(const DatabasePtr & database) return {}; guard.reset(); + LOG_TRACE(&Poco::Logger::get("StorageSystemDatabases"), "Failed to lock database {} ({}), will retry", name, database->getUUID()); } ASTPtr ast = database->getCreateDatabaseQuery(); From adefd4aa19dd88bd28838b93b4b68ffa2a2e8114 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy <99031427+yakov-olkhovskiy@users.noreply.github.com> Date: Wed, 14 Dec 2022 17:01:15 -0500 Subject: [PATCH 35/35] include logger --- src/Storages/System/StorageSystemDatabases.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Storages/System/StorageSystemDatabases.cpp b/src/Storages/System/StorageSystemDatabases.cpp index 457d8eabf3f..2353be9b69f 100644 --- a/src/Storages/System/StorageSystemDatabases.cpp +++ b/src/Storages/System/StorageSystemDatabases.cpp @@ -5,6 +5,7 @@ #include #include #include +#include namespace DB