From b302fec362e6f1194fd6d8e2404d9ca618703e14 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 9 Oct 2019 22:58:50 +0300 Subject: [PATCH 01/10] Fixed bad code in transformQueryForExternalDatabase --- .../evaluateConstantExpression.cpp | 4 +- .../Interpreters/evaluateConstantExpression.h | 1 + .../transformQueryForExternalDatabase.cpp | 58 +++++++++++++------ 3 files changed, 44 insertions(+), 19 deletions(-) diff --git a/dbms/src/Interpreters/evaluateConstantExpression.cpp b/dbms/src/Interpreters/evaluateConstantExpression.cpp index 07a657fb7dd..f7843f682b7 100644 --- a/dbms/src/Interpreters/evaluateConstantExpression.cpp +++ b/dbms/src/Interpreters/evaluateConstantExpression.cpp @@ -60,11 +60,11 @@ std::pair> evaluateConstantExpression(co ASTPtr evaluateConstantExpressionAsLiteral(const ASTPtr & node, const Context & context) { - /// Branch with string in query. + /// If it's already a literal. if (node->as()) return node; - /// Branch with TableFunction in query. + /// Skip table functions. if (const auto * table_func_ptr = node->as()) if (TableFunctionFactory::instance().isTableFunctionName(table_func_ptr->name)) return node; diff --git a/dbms/src/Interpreters/evaluateConstantExpression.h b/dbms/src/Interpreters/evaluateConstantExpression.h index a901612040b..a84104c53f4 100644 --- a/dbms/src/Interpreters/evaluateConstantExpression.h +++ b/dbms/src/Interpreters/evaluateConstantExpression.h @@ -20,6 +20,7 @@ using ExpressionActionsPtr = std::shared_ptr; /** Evaluate constant expression and its type. * Used in rare cases - for elements of set for IN, for data to INSERT. + * Throws exception if it's not a constant expression. * Quite suboptimal. */ std::pair> evaluateConstantExpression(const ASTPtr & node, const Context & context); diff --git a/dbms/src/Storages/transformQueryForExternalDatabase.cpp b/dbms/src/Storages/transformQueryForExternalDatabase.cpp index b6e48836efa..a3dbe162075 100644 --- a/dbms/src/Storages/transformQueryForExternalDatabase.cpp +++ b/dbms/src/Storages/transformQueryForExternalDatabase.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include #include @@ -8,9 +9,12 @@ #include #include #include +#include #include #include +#include + namespace DB { @@ -20,31 +24,48 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } -static void replaceConstFunction(IAST & node, const Context & context, const NamesAndTypesList & all_columns) +namespace { - for (size_t i = 0; i < node.children.size(); ++i) - { - auto child = node.children[i]; - if (auto * exp_list = child->as()) - replaceConstFunction(*exp_list, context, all_columns); - if (auto * function = child->as()) +class ReplacingConstantExpressionsMatcher +{ +public: + using Data = Block; + + static bool needChildVisit(ASTPtr &, const ASTPtr &) + { + return true; + } + + static void visit(ASTPtr & node, Block & block_with_constants) + { + if (!node->as()) + return; + + std::string name = node->getColumnName(); + if (block_with_constants.has(name)) { - NamesAndTypesList source_columns = all_columns; - ASTPtr query = function->ptr(); - auto syntax_result = SyntaxAnalyzer(context).analyze(query, source_columns); - auto result_block = KeyCondition::getBlockWithConstants(query, syntax_result, context); - if (!result_block.has(child->getColumnName())) + auto result_column = block_with_constants.getByName(name).column; + if (!isColumnConst(*result_column)) return; - auto result_column = result_block.getByName(child->getColumnName()).column; - - node.children[i] = std::make_shared((*result_column)[0]); + node = std::make_shared(assert_cast(*result_column).getField()); + /// TODO: Date and DateTime must be represented by 'YYYY-MM-DD...', not as number. } } +}; + +void replaceConstantExpressions(ASTPtr & node, const Context & context, const NamesAndTypesList & all_columns) +{ + auto syntax_result = SyntaxAnalyzer(context).analyze(node, all_columns); + Block block_with_constants = KeyCondition::getBlockWithConstants(node, syntax_result, context); + + InDepthNodeVisitor visitor(block_with_constants); + visitor.visit(node); } -static bool isCompatible(const IAST & node) + +bool isCompatible(const IAST & node) { if (const auto * function = node.as()) { @@ -99,6 +120,8 @@ static bool isCompatible(const IAST & node) return false; } +} + String transformQueryForExternalDatabase( const IAST & query, @@ -131,7 +154,8 @@ String transformQueryForExternalDatabase( ASTPtr original_where = clone_query->as().where(); if (original_where) { - replaceConstFunction(*original_where, context, available_columns); + replaceConstantExpressions(original_where, context, available_columns); + if (isCompatible(*original_where)) { select->setExpression(ASTSelectQuery::Expression::WHERE, std::move(original_where)); From e65b57369e40f47609050a0a0f7f673131cd3562 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 9 Oct 2019 23:13:34 +0300 Subject: [PATCH 02/10] transformQueryForExternalDatabase: fixed constant folding for Date and DateTime --- .../transformQueryForExternalDatabase.cpp | 27 ++++++++++++++----- 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/dbms/src/Storages/transformQueryForExternalDatabase.cpp b/dbms/src/Storages/transformQueryForExternalDatabase.cpp index a3dbe162075..aab240dc070 100644 --- a/dbms/src/Storages/transformQueryForExternalDatabase.cpp +++ b/dbms/src/Storages/transformQueryForExternalDatabase.cpp @@ -10,11 +10,10 @@ #include #include #include +#include #include #include -#include - namespace DB { @@ -45,12 +44,28 @@ public: std::string name = node->getColumnName(); if (block_with_constants.has(name)) { - auto result_column = block_with_constants.getByName(name).column; - if (!isColumnConst(*result_column)) + auto result = block_with_constants.getByName(name); + if (!isColumnConst(*result.column)) return; - node = std::make_shared(assert_cast(*result_column).getField()); - /// TODO: Date and DateTime must be represented by 'YYYY-MM-DD...', not as number. + if (result.column->isNullAt(0)) + { + node = std::make_shared(Field()); + } + else if (isNumber(result.type)) + { + node = std::make_shared(assert_cast(*result.column).getField()); + } + else + { + /// Everything except numbers is put as string literal. This is important for Date, DateTime, UUID. + + const IColumn & inner_column = assert_cast(*result.column).getDataColumn(); + + WriteBufferFromOwnString out; + result.type->serializeAsText(inner_column, 0, out, FormatSettings()); + node = std::make_shared(out.str()); + } } } }; From 3fbac603288ba9962aa9fa33c6f8d0116ce963c1 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 9 Oct 2019 23:16:17 +0300 Subject: [PATCH 03/10] Added a test --- ...st_transform_query_for_external_database.cpp | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/dbms/src/Storages/tests/gtest_transform_query_for_external_database.cpp b/dbms/src/Storages/tests/gtest_transform_query_for_external_database.cpp index 34f6ce64278..797cb677d6a 100644 --- a/dbms/src/Storages/tests/gtest_transform_query_for_external_database.cpp +++ b/dbms/src/Storages/tests/gtest_transform_query_for_external_database.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -17,7 +18,13 @@ using namespace DB; struct State { Context context{Context::createGlobal()}; - NamesAndTypesList columns{{"column", std::make_shared()}}; + NamesAndTypesList columns{ + {"column", std::make_shared()}, + {"apply_id", std::make_shared()}, + {"apply_type", std::make_shared()}, + {"apply_status", std::make_shared()}, + {"create_time", std::make_shared()}, + }; State() { @@ -85,5 +92,11 @@ TEST(TransformQueryForExternalDatabase, MultipleAndSubqueries) check("SELECT column FROM test.table WHERE toString(column) = '42' AND left(column, 10) = RIGHT(column, 10) AND column = 42", "SELECT \"column\" FROM \"test\".\"table\" WHERE (\"column\" = 42)", state().context, state().columns); - +} + +TEST(TransformQueryForExternalDatabase, Issue7245) +{ + check("select apply_id from test.table where apply_type = 2 and create_time > addDays(toDateTime('2019-01-01 01:02:03'),-7) and apply_status in (3,4)", + "SELECT \"apply_id\", \"apply_type\", \"apply_status\", \"create_time\" FROM \"test\".\"table\" WHERE (\"apply_type\" = 2) AND (\"create_time\" > '2018-12-25 01:02:03') AND (\"apply_status\" IN (3, 4))", + state().context, state().columns); } From 03c7b7ff8d3649e5d553fd6d0d4a0afa9f1787aa Mon Sep 17 00:00:00 2001 From: Alexander Kazakov Date: Thu, 10 Oct 2019 17:38:08 +0300 Subject: [PATCH 04/10] Prevent from using non-deterministic functions in mutations of replicated tables (#7247) * Added isDeterministic() to IFunctionBuilder i-face * A test for non-deterministic mutations * In MutationsInterpreter::validate() ensure deterministic functions * Adjusted nondeterministic mutation tests * Moved around some code in MutationsInterpreter::validate() * Dropped unnecessary check in findFirstNonDeterministicFuncName() * Corrections to IFunction{Base,Builder} interface --- dbms/src/Functions/FunctionsConversion.h | 3 + dbms/src/Functions/FunctionsMiscellaneous.h | 6 ++ dbms/src/Functions/FunctionsRandom.h | 4 +- dbms/src/Functions/IFunction.h | 23 ++++- dbms/src/Functions/now.cpp | 5 +- dbms/src/Functions/randConstant.cpp | 3 + dbms/src/Functions/runningDifference.h | 1 + dbms/src/Functions/toTypeName.cpp | 3 + dbms/src/Functions/today.cpp | 4 + dbms/src/Functions/yesterday.cpp | 4 + .../src/Interpreters/MutationsInterpreter.cpp | 84 ++++++++++++++++++- ...eterministic_functions_zookeeper.reference | 4 + ...th_nondeterministic_functions_zookeeper.sh | 43 ++++++++++ 13 files changed, 179 insertions(+), 8 deletions(-) create mode 100644 dbms/tests/queries/0_stateless/01017_mutations_with_nondeterministic_functions_zookeeper.reference create mode 100755 dbms/tests/queries/0_stateless/01017_mutations_with_nondeterministic_functions_zookeeper.sh diff --git a/dbms/src/Functions/FunctionsConversion.h b/dbms/src/Functions/FunctionsConversion.h index c16f439c3fa..7f89ea2430f 100644 --- a/dbms/src/Functions/FunctionsConversion.h +++ b/dbms/src/Functions/FunctionsConversion.h @@ -1540,6 +1540,9 @@ public: String getName() const override { return name; } + bool isDeterministic() const override { return true; } + bool isDeterministicInScopeOfQuery() const override { return true; } + bool hasInformationAboutMonotonicity() const override { return static_cast(monotonicity_for_range); diff --git a/dbms/src/Functions/FunctionsMiscellaneous.h b/dbms/src/Functions/FunctionsMiscellaneous.h index 96539f9559f..de1177dd8ed 100644 --- a/dbms/src/Functions/FunctionsMiscellaneous.h +++ b/dbms/src/Functions/FunctionsMiscellaneous.h @@ -26,6 +26,9 @@ public: String getName() const override { return "FunctionExpression"; } + bool isDeterministic() const override { return true; } + bool isDeterministicInScopeOfQuery() const override { return true; } + const DataTypes & getArgumentTypes() const override { return argument_types; } const DataTypePtr & getReturnType() const override { return return_type; } @@ -110,6 +113,9 @@ public: String getName() const override { return name; } + bool isDeterministic() const override { return true; } + bool isDeterministicInScopeOfQuery() const override { return true; } + const DataTypes & getArgumentTypes() const override { return captured_types; } const DataTypePtr & getReturnType() const override { return return_type; } diff --git a/dbms/src/Functions/FunctionsRandom.h b/dbms/src/Functions/FunctionsRandom.h index 069c0afa86b..9559f6121fd 100644 --- a/dbms/src/Functions/FunctionsRandom.h +++ b/dbms/src/Functions/FunctionsRandom.h @@ -53,9 +53,11 @@ public: return name; } + bool isDeterministic() const override { return false; } + bool isDeterministicInScopeOfQuery() const override { return false; } + bool isVariadic() const override { return true; } size_t getNumberOfArguments() const override { return 0; } - bool isDeterministicInScopeOfQuery() const override { return false; } DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override { diff --git a/dbms/src/Functions/IFunction.h b/dbms/src/Functions/IFunction.h index a3b4c790926..04e9783a519 100644 --- a/dbms/src/Functions/IFunction.h +++ b/dbms/src/Functions/IFunction.h @@ -198,9 +198,9 @@ public: * Example: now(). Another example: functions that work with periodically updated dictionaries. */ - virtual bool isDeterministic() const { return true; } + virtual bool isDeterministic() const = 0; - virtual bool isDeterministicInScopeOfQuery() const { return true; } + virtual bool isDeterministicInScopeOfQuery() const = 0; /** Lets you know if the function is monotonic in a range of values. * This is used to work with the index in a sorted chunk of data. @@ -240,11 +240,16 @@ public: /// Get the main function name. virtual String getName() const = 0; + /// See the comment for the same method in IFunctionBase + virtual bool isDeterministic() const = 0; + + virtual bool isDeterministicInScopeOfQuery() const = 0; + /// Override and return true if function needs to depend on the state of the data. - virtual bool isStateful() const { return false; } + virtual bool isStateful() const = 0; /// Override and return true if function could take different number of arguments. - virtual bool isVariadic() const { return false; } + virtual bool isVariadic() const = 0; /// For non-variadic functions, return number of arguments; otherwise return zero (that should be ignored). virtual size_t getNumberOfArguments() const = 0; @@ -277,6 +282,11 @@ public: return buildImpl(arguments, getReturnType(arguments)); } + bool isDeterministic() const override { return true; } + bool isDeterministicInScopeOfQuery() const override { return true; } + bool isStateful() const override { return false; } + bool isVariadic() const override { return false; } + /// Default implementation. Will check only in non-variadic case. void checkNumberOfArguments(size_t number_of_arguments) const override; @@ -357,6 +367,8 @@ public: ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {}; } bool canBeExecutedOnDefaultArguments() const override { return true; } bool canBeExecutedOnLowCardinalityDictionary() const override { return isDeterministicInScopeOfQuery(); } + bool isDeterministic() const override { return true; } + bool isDeterministicInScopeOfQuery() const override { return true; } using PreparedFunctionImpl::execute; using PreparedFunctionImpl::executeImplDryRun; @@ -506,6 +518,9 @@ public: return function->checkNumberOfArguments(number_of_arguments); } + bool isDeterministic() const override { return function->isDeterministic(); } + bool isDeterministicInScopeOfQuery() const override { return function->isDeterministicInScopeOfQuery(); } + String getName() const override { return function->getName(); } bool isStateful() const override { return function->isStateful(); } bool isVariadic() const override { return function->isVariadic(); } diff --git a/dbms/src/Functions/now.cpp b/dbms/src/Functions/now.cpp index b5b4d9d7918..35ecf75e420 100644 --- a/dbms/src/Functions/now.cpp +++ b/dbms/src/Functions/now.cpp @@ -53,6 +53,7 @@ public: } bool isDeterministic() const override { return false; } + bool isDeterministicInScopeOfQuery() const override { return true; } private: time_t time_value; @@ -65,8 +66,10 @@ public: static constexpr auto name = "now"; String getName() const override { return name; } - size_t getNumberOfArguments() const override { return 0; } + bool isDeterministic() const override { return false; } + + size_t getNumberOfArguments() const override { return 0; } static FunctionBuilderPtr create(const Context &) { return std::make_shared(); } protected: diff --git a/dbms/src/Functions/randConstant.cpp b/dbms/src/Functions/randConstant.cpp index e854484154c..3bbb3f56b0e 100644 --- a/dbms/src/Functions/randConstant.cpp +++ b/dbms/src/Functions/randConstant.cpp @@ -50,6 +50,7 @@ public: } bool isDeterministic() const override { return false; } + bool isDeterministicInScopeOfQuery() const override { return true; } private: ToType value; @@ -64,6 +65,8 @@ public: static constexpr auto name = Name::name; String getName() const override { return name; } + bool isDeterministic() const override { return false; } + bool isVariadic() const override { return true; } size_t getNumberOfArguments() const override { return 0; } diff --git a/dbms/src/Functions/runningDifference.h b/dbms/src/Functions/runningDifference.h index 374142311e9..7cda3b0be94 100644 --- a/dbms/src/Functions/runningDifference.h +++ b/dbms/src/Functions/runningDifference.h @@ -141,6 +141,7 @@ public: return 1; } + bool isDeterministic() const override { return false; } bool isDeterministicInScopeOfQuery() const override { return false; diff --git a/dbms/src/Functions/toTypeName.cpp b/dbms/src/Functions/toTypeName.cpp index 55d602167a5..202274787bc 100644 --- a/dbms/src/Functions/toTypeName.cpp +++ b/dbms/src/Functions/toTypeName.cpp @@ -38,6 +38,9 @@ public: static constexpr auto name = "toTypeName"; String getName() const override { return name; } + bool isDeterministic() const override { return true; } + bool isDeterministicInScopeOfQuery() const override { return true; } + const DataTypes & getArgumentTypes() const override { return argument_types; } const DataTypePtr & getReturnType() const override { return return_type; } diff --git a/dbms/src/Functions/today.cpp b/dbms/src/Functions/today.cpp index 72e4867a950..8f692333170 100644 --- a/dbms/src/Functions/today.cpp +++ b/dbms/src/Functions/today.cpp @@ -52,6 +52,7 @@ public: } bool isDeterministic() const override { return false; } + bool isDeterministicInScopeOfQuery() const override { return true; } private: DayNum day_value; @@ -64,6 +65,9 @@ public: static constexpr auto name = "today"; String getName() const override { return name; } + + bool isDeterministic() const override { return false; } + size_t getNumberOfArguments() const override { return 0; } static FunctionBuilderPtr create(const Context &) { return std::make_shared(); } diff --git a/dbms/src/Functions/yesterday.cpp b/dbms/src/Functions/yesterday.cpp index 565a2c40913..88aa6896f5a 100644 --- a/dbms/src/Functions/yesterday.cpp +++ b/dbms/src/Functions/yesterday.cpp @@ -52,6 +52,7 @@ public: } bool isDeterministic() const override { return false; } + bool isDeterministicInScopeOfQuery() const override { return true; } private: DayNum day_value; @@ -64,6 +65,9 @@ public: static constexpr auto name = "yesterday"; String getName() const override { return name; } + + bool isDeterministic() const override { return false; } + size_t getNumberOfArguments() const override { return 0; } static FunctionBuilderPtr create(const Context &) { return std::make_shared(); } diff --git a/dbms/src/Interpreters/MutationsInterpreter.cpp b/dbms/src/Interpreters/MutationsInterpreter.cpp index a025ba87c1a..2641ab2a5c4 100644 --- a/dbms/src/Interpreters/MutationsInterpreter.cpp +++ b/dbms/src/Interpreters/MutationsInterpreter.cpp @@ -1,6 +1,11 @@ +#include "MutationsInterpreter.h" + +#include +#include +#include +#include #include #include -#include #include #include #include @@ -14,7 +19,6 @@ #include #include #include -#include "MutationsInterpreter.h" namespace DB @@ -27,6 +31,67 @@ namespace ErrorCodes extern const int CANNOT_UPDATE_COLUMN; } +namespace +{ +struct FirstNonDeterministicFuncData +{ + using TypeToVisit = ASTFunction; + + explicit FirstNonDeterministicFuncData(const Context & context_) + : context{context_} + {} + + const Context & context; + std::optional nondeterministic_function_name; + + void visit(ASTFunction & function, ASTPtr &) + { + if (nondeterministic_function_name) + return; + + const auto func = FunctionFactory::instance().get(function.name, context); + if (!func->isDeterministic()) + nondeterministic_function_name = func->getName(); + } +}; + +using FirstNonDeterministicFuncFinder = + InDepthNodeVisitor, true>; + +std::optional findFirstNonDeterministicFuncName(const MutationCommand & command, const Context & context) +{ + FirstNonDeterministicFuncData finder_data(context); + + switch (command.type) + { + case MutationCommand::UPDATE: + { + auto update_assignments_ast = command.ast->as().update_assignments->clone(); + FirstNonDeterministicFuncFinder(finder_data).visit(update_assignments_ast); + + if (finder_data.nondeterministic_function_name) + return finder_data.nondeterministic_function_name; + + [[fallthrough]]; + } + + case MutationCommand::DELETE: + { + auto predicate_ast = command.predicate->clone(); + FirstNonDeterministicFuncFinder(finder_data).visit(predicate_ast); + + return finder_data.nondeterministic_function_name; + } + + default: + break; + } + + return {}; +} +}; + + bool MutationsInterpreter::isStorageTouchedByMutations() const { if (commands.empty()) @@ -440,6 +505,21 @@ BlockInputStreamPtr MutationsInterpreter::addStreamsForLaterStages(const std::ve void MutationsInterpreter::validate(TableStructureReadLockHolder &) { + /// For Replicated* storages mutations cannot employ non-deterministic functions + /// because that produces inconsistencies between replicas + if (startsWith(storage->getName(), "Replicated")) + { + for (const auto & command : commands) + { + const auto nondeterministic_func_name = findFirstNonDeterministicFuncName(command, context); + if (nondeterministic_func_name) + throw Exception( + "ALTER UPDATE/ALTER DELETE statements must use only deterministic functions! " + "Function '" + *nondeterministic_func_name + "' is non-deterministic", + ErrorCodes::BAD_ARGUMENTS); + } + } + const auto & select_query = prepare(/* dry_run = */ true); InterpreterSelectQuery interpreter{select_query, context, storage, SelectQueryOptions().analyze(/* dry_run = */ true).ignoreLimits()}; /// Do not use getSampleBlock in order to check the whole pipeline. diff --git a/dbms/tests/queries/0_stateless/01017_mutations_with_nondeterministic_functions_zookeeper.reference b/dbms/tests/queries/0_stateless/01017_mutations_with_nondeterministic_functions_zookeeper.reference new file mode 100644 index 00000000000..b462a5a7baa --- /dev/null +++ b/dbms/tests/queries/0_stateless/01017_mutations_with_nondeterministic_functions_zookeeper.reference @@ -0,0 +1,4 @@ +OK +OK +OK +OK diff --git a/dbms/tests/queries/0_stateless/01017_mutations_with_nondeterministic_functions_zookeeper.sh b/dbms/tests/queries/0_stateless/01017_mutations_with_nondeterministic_functions_zookeeper.sh new file mode 100755 index 00000000000..ac66dbc352a --- /dev/null +++ b/dbms/tests/queries/0_stateless/01017_mutations_with_nondeterministic_functions_zookeeper.sh @@ -0,0 +1,43 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. $CURDIR/../shell_config.sh + + +R1=table_1017_1 +R2=table_1017_2 +T1=table_1017_merge + +${CLICKHOUSE_CLIENT} -n -q " + DROP TABLE IF EXISTS $R1; + DROP TABLE IF EXISTS $R2; + + CREATE TABLE $R1 (x UInt32, y UInt32) ENGINE ReplicatedMergeTree('/clickhouse/tables/${CLICKHOUSE_DATABASE}.table_1017', 'r1') ORDER BY x; + CREATE TABLE $R2 (x UInt32, y UInt32) ENGINE ReplicatedMergeTree('/clickhouse/tables/${CLICKHOUSE_DATABASE}.table_1017', 'r2') ORDER BY x; + CREATE TABLE $T1 (x UInt32, y UInt32) ENGINE MergeTree() ORDER BY x; + + INSERT INTO $R1 VALUES (0, 1)(1, 2)(2, 3)(3, 4); + INSERT INTO $T1 VALUES (0, 1)(1, 2)(2, 3)(3, 4); +" + +# Check that in mutations of replicated tables predicates do not contain non-deterministic functions +${CLICKHOUSE_CLIENT} --query "ALTER TABLE $R1 DELETE WHERE ignore(rand())" 2>&1 \ +| fgrep -q "must use only deterministic functions" && echo 'OK' || echo 'FAIL' + +${CLICKHOUSE_CLIENT} --query "ALTER TABLE $R1 UPDATE y = y + rand() % 1 WHERE not ignore()" 2>&1 \ +| fgrep -q "must use only deterministic functions" && echo 'OK' || echo 'FAIL' + + +# For regular tables we do not enforce deterministic functions +${CLICKHOUSE_CLIENT} --query "ALTER TABLE $T1 DELETE WHERE rand() = 0" 2>&1 > /dev/null \ +&& echo 'OK' || echo 'FAIL' + +${CLICKHOUSE_CLIENT} --query "ALTER TABLE $T1 UPDATE y = y + rand() % 1 WHERE not ignore()" 2>&1 > /dev/null \ +&& echo 'OK' || echo 'FAIL' + + +${CLICKHOUSE_CLIENT} -n -q " + DROP TABLE IF EXISTS $R2; + DROP TABLE IF EXISTS $R1; + DROP TABLE IF EXISTS $T1; +" From 742b9481567cd306c07cf962605ad4a18522e5bf Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 10 Oct 2019 19:30:33 +0300 Subject: [PATCH 05/10] Disable memory tracker for exception stack. --- dbms/src/Common/CurrentThread.cpp | 10 ++++++++++ dbms/src/Common/CurrentThread.h | 1 + dbms/src/Common/ThreadStatus.cpp | 2 +- dbms/src/Common/ThreadStatus.h | 27 ++++++++++++++++++++++++++ dbms/src/Interpreters/executeQuery.cpp | 5 +++++ 5 files changed, 44 insertions(+), 1 deletion(-) diff --git a/dbms/src/Common/CurrentThread.cpp b/dbms/src/Common/CurrentThread.cpp index ca39bec414c..db658c921bf 100644 --- a/dbms/src/Common/CurrentThread.cpp +++ b/dbms/src/Common/CurrentThread.cpp @@ -51,6 +51,16 @@ MemoryTracker * CurrentThread::getMemoryTracker() return ¤t_thread->memory_tracker; } +DisableMemoryTrackerGuard CurrentThread::temporaryDisableMemoryTracker() +{ + static MemoryTracker * no_tracker = nullptr; + + if (unlikely(!current_thread)) + return DisableMemoryTrackerGuard(no_tracker); + + return DisableMemoryTrackerGuard(current_thread->memory_tracker_ptr); +} + void CurrentThread::updateProgressIn(const Progress & value) { if (unlikely(!current_thread)) diff --git a/dbms/src/Common/CurrentThread.h b/dbms/src/Common/CurrentThread.h index 1e0140c6330..6954cbc1418 100644 --- a/dbms/src/Common/CurrentThread.h +++ b/dbms/src/Common/CurrentThread.h @@ -52,6 +52,7 @@ public: static ProfileEvents::Counters & getProfileEvents(); static MemoryTracker * getMemoryTracker(); + static DisableMemoryTrackerGuard temporaryDisableMemoryTracker(); static inline Int64 & getUntrackedMemory() { diff --git a/dbms/src/Common/ThreadStatus.cpp b/dbms/src/Common/ThreadStatus.cpp index ff6c23c1794..05101377dfd 100644 --- a/dbms/src/Common/ThreadStatus.cpp +++ b/dbms/src/Common/ThreadStatus.cpp @@ -32,7 +32,7 @@ TasksStatsCounters TasksStatsCounters::current() return res; } -ThreadStatus::ThreadStatus() +ThreadStatus::ThreadStatus() : memory_tracker_ptr(&memory_tracker) { thread_number = getThreadNumber(); os_thread_id = TaskStatsInfoGetter::getCurrentTID(); diff --git a/dbms/src/Common/ThreadStatus.h b/dbms/src/Common/ThreadStatus.h index 2ba55fa07d0..c321fca5d57 100644 --- a/dbms/src/Common/ThreadStatus.h +++ b/dbms/src/Common/ThreadStatus.h @@ -77,6 +77,31 @@ using ThreadGroupStatusPtr = std::shared_ptr; extern thread_local ThreadStatus * current_thread; + +/// Helper which allows to temporarily disable memory tracker. +/// Use parent memory tracker instead of current if can. +class DisableMemoryTrackerGuard +{ +public: + explicit DisableMemoryTrackerGuard(MemoryTracker *& memory_tracker_ptr_) + : memory_tracker_ptr(memory_tracker_ptr_) + , prev_value(memory_tracker_ptr ? memory_tracker_ptr->getParent() : nullptr) + { + if (memory_tracker_ptr) + std::swap(memory_tracker_ptr, prev_value); + } + + ~DisableMemoryTrackerGuard() + { + if (prev_value) + std::swap(memory_tracker_ptr, prev_value); + } + +private: + MemoryTracker *& memory_tracker_ptr; + MemoryTracker * prev_value = nullptr; +}; + /** Encapsulates all per-thread info (ProfileEvents, MemoryTracker, query_id, query context, etc.). * The object must be created in thread function and destroyed in the same thread before the exit. * It is accessed through thread-local pointer. @@ -99,6 +124,8 @@ public: /// TODO: merge them into common entity ProfileEvents::Counters performance_counters{VariableContext::Thread}; MemoryTracker memory_tracker{VariableContext::Thread}; + /// Pointer to memory tracker. Is initialized in constructor. May be temporary disabled (set to nullptr). + MemoryTracker * memory_tracker_ptr = nullptr; /// Small amount of untracked memory (per thread atomic-less counter) Int64 untracked_memory = 0; diff --git a/dbms/src/Interpreters/executeQuery.cpp b/dbms/src/Interpreters/executeQuery.cpp index ea27ab35968..a76167b2b3a 100644 --- a/dbms/src/Interpreters/executeQuery.cpp +++ b/dbms/src/Interpreters/executeQuery.cpp @@ -120,6 +120,11 @@ static void logQuery(const String & query, const Context & context, bool interna /// Call this inside catch block. static void setExceptionStackTrace(QueryLogElement & elem) { + /// Disable memory tracker for stack trace. + /// Because if exception is "Memory limit (for query) exceed", then we probably can't allocate another one string. + /// This memory will be tracked for user, so won't have memory leak. + auto guard = CurrentThread::temporaryDisableMemoryTracker(); + try { throw; From f011c1020e829e0e06ced58849a60a66f4775d2a Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 10 Oct 2019 20:37:34 +0300 Subject: [PATCH 06/10] Disable memory tracker for exception stack. --- dbms/src/Common/CurrentThread.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Common/CurrentThread.cpp b/dbms/src/Common/CurrentThread.cpp index db658c921bf..426f3f0e526 100644 --- a/dbms/src/Common/CurrentThread.cpp +++ b/dbms/src/Common/CurrentThread.cpp @@ -48,7 +48,7 @@ MemoryTracker * CurrentThread::getMemoryTracker() { if (unlikely(!current_thread)) return nullptr; - return ¤t_thread->memory_tracker; + return current_thread->memory_tracker_ptr; } DisableMemoryTrackerGuard CurrentThread::temporaryDisableMemoryTracker() From 495d1de46fb9e1e36583575e81e10372302f1eba Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Thu, 10 Oct 2019 21:47:51 +0300 Subject: [PATCH 07/10] clickhouse-test: introduce -U/--unified option (pass to diff(1)) --- dbms/tests/clickhouse-test | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dbms/tests/clickhouse-test b/dbms/tests/clickhouse-test index 28a41fc1f06..7048e8c9609 100755 --- a/dbms/tests/clickhouse-test +++ b/dbms/tests/clickhouse-test @@ -214,7 +214,7 @@ def run_tests_array(all_tests_with_params): result_is_different = subprocess.call(['diff', '-q', reference_file, stdout_file], stdout = PIPE) if result_is_different: - diff = Popen(['diff', '--unified', reference_file, stdout_file], stdout = PIPE).communicate()[0] + diff = Popen(['diff', '-U', str(args.unified), reference_file, stdout_file], stdout = PIPE).communicate()[0] failures += 1 print("{0} - result differs with reference:\n{1}".format(MSG_FAIL, diff)) else: @@ -456,6 +456,7 @@ if __name__ == '__main__': parser.add_argument('--database', help='Database for tests (random name test_XXXXXX by default)') parser.add_argument('--parallel', default='1/1', help='One parallel test run number/total') parser.add_argument('-j', '--jobs', default=1, nargs='?', type=int, help='Run all tests in parallel') + parser.add_argument('-U', '--unified', default=3, type=int, help='output NUM lines of unified context') parser.add_argument('--no-stateless', action='store_true', help='Disable all stateless tests') parser.add_argument('--no-stateful', action='store_true', help='Disable all stateful tests') From eb7d4797e36c177de82a250b60c9c6d92536c7b2 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Thu, 10 Oct 2019 21:47:51 +0300 Subject: [PATCH 08/10] tests/00212_shard_aggregate_function_uniq: break into sections --- ...12_shard_aggregate_function_uniq.reference | 22 ++++++++ .../00212_shard_aggregate_function_uniq.sql | 51 +++++++++++++++++-- 2 files changed, 68 insertions(+), 5 deletions(-) diff --git a/dbms/tests/queries/0_stateless/00212_shard_aggregate_function_uniq.reference b/dbms/tests/queries/0_stateless/00212_shard_aggregate_function_uniq.reference index 7ca0f2fb7be..2d8de749614 100644 --- a/dbms/tests/queries/0_stateless/00212_shard_aggregate_function_uniq.reference +++ b/dbms/tests/queries/0_stateless/00212_shard_aggregate_function_uniq.reference @@ -1,3 +1,4 @@ +uniqHLL12 1 1 3 1 6 1 @@ -50,6 +51,7 @@ 31 54151 35 54328 36 52997 +uniqHLL12 round(float) 0.125 1 0.5 1 0.05 1 @@ -102,6 +104,7 @@ 0.043 54620 0.037 53394 0.071 53951 +uniqHLL12 round(toFloat32()) 0.5 1 0.05 1 0.25 1 @@ -154,6 +157,7 @@ 0.037 53394 0.1 54138 1 54571 +uniqHLL12 IPv4NumToString 1 1 3 1 6 1 @@ -206,7 +210,9 @@ 31 53574 35 55022 36 53961 +uniqHLL12 remote() 1 +uniqCombined 1 1 3 1 6 1 @@ -259,6 +265,7 @@ 31 53948 35 53931 36 53982 +uniqCombined(12) 1 1 3 1 6 1 @@ -311,6 +318,7 @@ 31 53763 35 54635 36 53155 +uniqCombined(17) 1 1 3 1 6 1 @@ -363,6 +371,7 @@ 31 53948 35 53931 36 53982 +uniqCombined(20) 1 1 3 1 6 1 @@ -415,6 +424,7 @@ 31 54054 35 54054 36 54054 +uniqCombined(round(float)) 0.125 1 0.5 1 0.05 1 @@ -467,6 +477,7 @@ 0.043 54150 0.037 54047 0.071 53963 +uniqCombined(12)(round(float)) 0.125 1 0.5 1 0.05 1 @@ -519,6 +530,7 @@ 0.043 53827 0.037 53920 0.071 53409 +uniqCombined(17)(round(float)) 0.125 1 0.5 1 0.05 1 @@ -571,6 +583,7 @@ 0.043 54150 0.037 54047 0.071 53963 +uniqCombined(20)(round(float)) 0.125 1 0.5 1 0.05 1 @@ -623,6 +636,7 @@ 0.043 54053 0.037 54053 0.071 54054 +uniqCombined(X)(round(toFloat32())) 0.5 1 0.05 1 0.25 1 @@ -675,6 +689,7 @@ 0.037 54047 0.1 53853 1 53901 +uniqCombined(12)(round(toFloat32())) 0.5 1 0.05 1 0.25 1 @@ -727,6 +742,7 @@ 0.037 53920 0.1 53417 1 54708 +uniqCombined(17)(round(toFloat32())) 0.5 1 0.05 1 0.25 1 @@ -779,6 +795,7 @@ 0.037 54047 0.1 53853 1 53901 +uniqCombined(20)(round(toFloat32())) 0.5 1 0.05 1 0.25 1 @@ -831,6 +848,7 @@ 0.037 54053 0.1 54053 1 54054 +uniqCombined(Z)(IPv4NumToString) 1 1 3 1 6 1 @@ -883,6 +901,7 @@ 31 54074 35 54153 36 53999 +uniqCombined(12)(IPv4NumToString) 1 1 3 1 6 1 @@ -935,6 +954,7 @@ 31 55200 35 54808 36 53051 +uniqCombined(17)(IPv4NumToString) 1 1 3 1 6 1 @@ -987,6 +1007,7 @@ 31 54074 35 54153 36 53999 +uniqCombined(20)(IPv4NumToString) 1 1 3 1 6 1 @@ -1039,6 +1060,7 @@ 31 54054 35 54054 36 54054 +uniqCombined remote() 1 1 1 diff --git a/dbms/tests/queries/0_stateless/00212_shard_aggregate_function_uniq.sql b/dbms/tests/queries/0_stateless/00212_shard_aggregate_function_uniq.sql index ae54831b1af..afef71ae06d 100644 --- a/dbms/tests/queries/0_stateless/00212_shard_aggregate_function_uniq.sql +++ b/dbms/tests/queries/0_stateless/00212_shard_aggregate_function_uniq.sql @@ -1,93 +1,134 @@ -/* uniqHLL12 */ +-- uniqHLL12 + +SELECT 'uniqHLL12'; SELECT Y, uniqHLL12(X) FROM (SELECT number AS X, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 15) GROUP BY Y; SELECT Y, uniqHLL12(X) FROM (SELECT number AS X, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; SELECT Y, uniqHLL12(X) FROM (SELECT number AS X, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; +SELECT 'uniqHLL12 round(float)'; + SELECT Y, uniqHLL12(X) FROM (SELECT number AS X, round(1/(1 + (3*X*X - 7*X + 11) % 37), 3) AS Y FROM system.numbers LIMIT 15) GROUP BY Y; SELECT Y, uniqHLL12(X) FROM (SELECT number AS X, round(1/(1 + (3*X*X - 7*X + 11) % 37), 3) AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; SELECT Y, uniqHLL12(X) FROM (SELECT number AS X, round(1/(1 + (3*X*X - 7*X + 11) % 37), 3) AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; +SELECT 'uniqHLL12 round(toFloat32())'; + SELECT Y, uniqHLL12(X) FROM (SELECT number AS X, round(toFloat32(1/(1 + (3*X*X - 7*X + 11) % 37)), 3) AS Y FROM system.numbers LIMIT 15) GROUP BY Y; SELECT Y, uniqHLL12(X) FROM (SELECT number AS X, round(toFloat32(1/(1 + (3*X*X - 7*X + 11) % 37)), 3) AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; SELECT Y, uniqHLL12(X) FROM (SELECT number AS X, round(toFloat32(1/(1 + (3*X*X - 7*X + 11) % 37)), 3) AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; +SELECT 'uniqHLL12 IPv4NumToString'; + SELECT Y, uniqHLL12(Z) FROM (SELECT number AS X, IPv4NumToString(toUInt32(X)) AS Z, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 15) GROUP BY Y; SELECT Y, uniqHLL12(Z) FROM (SELECT number AS X, IPv4NumToString(toUInt32(X)) AS Z, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; SELECT Y, uniqHLL12(Z) FROM (SELECT number AS X, IPv4NumToString(toUInt32(X)) AS Z, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; +SELECT 'uniqHLL12 remote()'; + SELECT uniqHLL12(dummy) FROM remote('127.0.0.{2,3}', system.one); -/* uniqCombined */ +-- uniqCombined + +SELECT 'uniqCombined'; SELECT Y, uniqCombined(X) FROM (SELECT number AS X, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 15) GROUP BY Y; SELECT Y, uniqCombined(X) FROM (SELECT number AS X, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; SELECT Y, uniqCombined(X) FROM (SELECT number AS X, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; +SELECT 'uniqCombined(12)'; + SELECT Y, uniqCombined(12)(X) FROM (SELECT number AS X, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 15) GROUP BY Y; SELECT Y, uniqCombined(12)(X) FROM (SELECT number AS X, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; SELECT Y, uniqCombined(12)(X) FROM (SELECT number AS X, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; +SELECT 'uniqCombined(17)'; + SELECT Y, uniqCombined(17)(X) FROM (SELECT number AS X, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 15) GROUP BY Y; SELECT Y, uniqCombined(17)(X) FROM (SELECT number AS X, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; SELECT Y, uniqCombined(17)(X) FROM (SELECT number AS X, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; +SELECT 'uniqCombined(20)'; + SELECT Y, uniqCombined(20)(X) FROM (SELECT number AS X, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 15) GROUP BY Y; SELECT Y, uniqCombined(20)(X) FROM (SELECT number AS X, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; SELECT Y, uniqCombined(20)(X) FROM (SELECT number AS X, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; +SELECT 'uniqCombined(round(float))'; + SELECT Y, uniqCombined(X) FROM (SELECT number AS X, round(1/(1 + (3*X*X - 7*X + 11) % 37), 3) AS Y FROM system.numbers LIMIT 15) GROUP BY Y; SELECT Y, uniqCombined(X) FROM (SELECT number AS X, round(1/(1 + (3*X*X - 7*X + 11) % 37), 3) AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; SELECT Y, uniqCombined(X) FROM (SELECT number AS X, round(1/(1 + (3*X*X - 7*X + 11) % 37), 3) AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; +SELECT 'uniqCombined(12)(round(float))'; + SELECT Y, uniqCombined(12)(X) FROM (SELECT number AS X, round(1/(1 + (3*X*X - 7*X + 11) % 37), 3) AS Y FROM system.numbers LIMIT 15) GROUP BY Y; SELECT Y, uniqCombined(12)(X) FROM (SELECT number AS X, round(1/(1 + (3*X*X - 7*X + 11) % 37), 3) AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; SELECT Y, uniqCombined(12)(X) FROM (SELECT number AS X, round(1/(1 + (3*X*X - 7*X + 11) % 37), 3) AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; +SELECT 'uniqCombined(17)(round(float))'; + SELECT Y, uniqCombined(17)(X) FROM (SELECT number AS X, round(1/(1 + (3*X*X - 7*X + 11) % 37), 3) AS Y FROM system.numbers LIMIT 15) GROUP BY Y; SELECT Y, uniqCombined(17)(X) FROM (SELECT number AS X, round(1/(1 + (3*X*X - 7*X + 11) % 37), 3) AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; SELECT Y, uniqCombined(17)(X) FROM (SELECT number AS X, round(1/(1 + (3*X*X - 7*X + 11) % 37), 3) AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; +SELECT 'uniqCombined(20)(round(float))'; + SELECT Y, uniqCombined(20)(X) FROM (SELECT number AS X, round(1/(1 + (3*X*X - 7*X + 11) % 37), 3) AS Y FROM system.numbers LIMIT 15) GROUP BY Y; SELECT Y, uniqCombined(20)(X) FROM (SELECT number AS X, round(1/(1 + (3*X*X - 7*X + 11) % 37), 3) AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; SELECT Y, uniqCombined(20)(X) FROM (SELECT number AS X, round(1/(1 + (3*X*X - 7*X + 11) % 37), 3) AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; +SELECT 'uniqCombined(X)(round(toFloat32()))'; + SELECT Y, uniqCombined(X) FROM (SELECT number AS X, round(toFloat32(1/(1 + (3*X*X - 7*X + 11) % 37)), 3) AS Y FROM system.numbers LIMIT 15) GROUP BY Y; SELECT Y, uniqCombined(X) FROM (SELECT number AS X, round(toFloat32(1/(1 + (3*X*X - 7*X + 11) % 37)), 3) AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; SELECT Y, uniqCombined(X) FROM (SELECT number AS X, round(toFloat32(1/(1 + (3*X*X - 7*X + 11) % 37)), 3) AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; +SELECT 'uniqCombined(12)(round(toFloat32()))'; + SELECT Y, uniqCombined(12)(X) FROM (SELECT number AS X, round(toFloat32(1/(1 + (3*X*X - 7*X + 11) % 37)), 3) AS Y FROM system.numbers LIMIT 15) GROUP BY Y; SELECT Y, uniqCombined(12)(X) FROM (SELECT number AS X, round(toFloat32(1/(1 + (3*X*X - 7*X + 11) % 37)), 3) AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; SELECT Y, uniqCombined(12)(X) FROM (SELECT number AS X, round(toFloat32(1/(1 + (3*X*X - 7*X + 11) % 37)), 3) AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; +SELECT 'uniqCombined(17)(round(toFloat32()))'; + SELECT Y, uniqCombined(17)(X) FROM (SELECT number AS X, round(toFloat32(1/(1 + (3*X*X - 7*X + 11) % 37)), 3) AS Y FROM system.numbers LIMIT 15) GROUP BY Y; SELECT Y, uniqCombined(17)(X) FROM (SELECT number AS X, round(toFloat32(1/(1 + (3*X*X - 7*X + 11) % 37)), 3) AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; SELECT Y, uniqCombined(17)(X) FROM (SELECT number AS X, round(toFloat32(1/(1 + (3*X*X - 7*X + 11) % 37)), 3) AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; +SELECT 'uniqCombined(20)(round(toFloat32()))'; + SELECT Y, uniqCombined(20)(X) FROM (SELECT number AS X, round(toFloat32(1/(1 + (3*X*X - 7*X + 11) % 37)), 3) AS Y FROM system.numbers LIMIT 15) GROUP BY Y; SELECT Y, uniqCombined(20)(X) FROM (SELECT number AS X, round(toFloat32(1/(1 + (3*X*X - 7*X + 11) % 37)), 3) AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; SELECT Y, uniqCombined(20)(X) FROM (SELECT number AS X, round(toFloat32(1/(1 + (3*X*X - 7*X + 11) % 37)), 3) AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; +SELECT 'uniqCombined(Z)(IPv4NumToString)'; + SELECT Y, uniqCombined(Z) FROM (SELECT number AS X, IPv4NumToString(toUInt32(X)) AS Z, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 15) GROUP BY Y; SELECT Y, uniqCombined(Z) FROM (SELECT number AS X, IPv4NumToString(toUInt32(X)) AS Z, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; SELECT Y, uniqCombined(Z) FROM (SELECT number AS X, IPv4NumToString(toUInt32(X)) AS Z, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; +SELECT 'uniqCombined(12)(IPv4NumToString)'; + SELECT Y, uniqCombined(12)(Z) FROM (SELECT number AS X, IPv4NumToString(toUInt32(X)) AS Z, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 15) GROUP BY Y; SELECT Y, uniqCombined(12)(Z) FROM (SELECT number AS X, IPv4NumToString(toUInt32(X)) AS Z, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; SELECT Y, uniqCombined(12)(Z) FROM (SELECT number AS X, IPv4NumToString(toUInt32(X)) AS Z, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; +SELECT 'uniqCombined(17)(IPv4NumToString)'; + SELECT Y, uniqCombined(17)(Z) FROM (SELECT number AS X, IPv4NumToString(toUInt32(X)) AS Z, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 15) GROUP BY Y; SELECT Y, uniqCombined(17)(Z) FROM (SELECT number AS X, IPv4NumToString(toUInt32(X)) AS Z, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; SELECT Y, uniqCombined(17)(Z) FROM (SELECT number AS X, IPv4NumToString(toUInt32(X)) AS Z, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; +SELECT 'uniqCombined(20)(IPv4NumToString)'; + SELECT Y, uniqCombined(20)(Z) FROM (SELECT number AS X, IPv4NumToString(toUInt32(X)) AS Z, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 15) GROUP BY Y; SELECT Y, uniqCombined(20)(Z) FROM (SELECT number AS X, IPv4NumToString(toUInt32(X)) AS Z, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; SELECT Y, uniqCombined(20)(Z) FROM (SELECT number AS X, IPv4NumToString(toUInt32(X)) AS Z, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; +SELECT 'uniqCombined remote()'; + SELECT uniqCombined(dummy) FROM remote('127.0.0.{2,3}', system.one); - SELECT uniqCombined(12)(dummy) FROM remote('127.0.0.{2,3}', system.one); - SELECT uniqCombined(17)(dummy) FROM remote('127.0.0.{2,3}', system.one); - SELECT uniqCombined(20)(dummy) FROM remote('127.0.0.{2,3}', system.one); From e25bdc8d25656486a73493ab0724342f40eeb27b Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Thu, 10 Oct 2019 21:47:51 +0300 Subject: [PATCH 09/10] tests/00212_shard_aggregate_function_uniq: adjust values for uniqCombined(12)(String) In e373862c83 the maxium size for HashTable in CombinedCardinalityEstimator had been reduced for power of 2 for String (since otherwise the size of the hashtable becames bigger then the sizeof HLL). Refs: https://github.com/ClickHouse/ClickHouse/pull/7236#issuecomment-540496270 Fixes: e373862c83 ("Do not use more then 98K of memory for uniqCombined*") --- ...12_shard_aggregate_function_uniq.reference | 60 +++++++++---------- 1 file changed, 30 insertions(+), 30 deletions(-) diff --git a/dbms/tests/queries/0_stateless/00212_shard_aggregate_function_uniq.reference b/dbms/tests/queries/0_stateless/00212_shard_aggregate_function_uniq.reference index 2d8de749614..63686e2e352 100644 --- a/dbms/tests/queries/0_stateless/00212_shard_aggregate_function_uniq.reference +++ b/dbms/tests/queries/0_stateless/00212_shard_aggregate_function_uniq.reference @@ -916,24 +916,24 @@ uniqCombined(12)(IPv4NumToString) 31 1 35 1 36 1 -0 162 +0 161 1 162 3 162 6 162 -7 163 -9 163 +7 164 +9 162 10 81 -11 163 -13 162 -14 162 -17 162 -19 162 -20 162 -21 162 -22 162 +11 160 +13 163 +14 161 +17 159 +19 165 +20 163 +21 163 +22 158 26 162 31 162 -35 162 +35 164 36 162 0 52613 1 54468 @@ -1041,25 +1041,25 @@ uniqCombined(20)(IPv4NumToString) 31 162 35 162 36 162 -0 54054 -1 54054 -3 54054 -6 54054 -7 54054 -9 54054 +0 54038 +1 54104 +3 54033 +6 54084 +7 54081 +9 54093 10 27027 -11 54055 -13 54054 -14 54054 -17 54054 -19 54054 -20 54054 -21 54054 -22 54054 -26 54054 -31 54054 -35 54054 -36 54054 +11 54064 +13 54055 +14 54063 +17 54055 +19 53960 +20 54033 +21 53988 +22 54086 +26 54106 +31 54039 +35 54018 +36 54084 uniqCombined remote() 1 1 From 0ce86e1f8f42c3c32c557b004c1d842766cd8598 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 10 Oct 2019 21:55:16 +0300 Subject: [PATCH 10/10] Disable memory tracker for exception stack. --- dbms/src/Common/CurrentThread.cpp | 12 +----------- dbms/src/Common/CurrentThread.h | 1 - dbms/src/Common/ThreadStatus.cpp | 2 +- dbms/src/Common/ThreadStatus.h | 27 -------------------------- dbms/src/Interpreters/executeQuery.cpp | 3 +-- 5 files changed, 3 insertions(+), 42 deletions(-) diff --git a/dbms/src/Common/CurrentThread.cpp b/dbms/src/Common/CurrentThread.cpp index 426f3f0e526..ca39bec414c 100644 --- a/dbms/src/Common/CurrentThread.cpp +++ b/dbms/src/Common/CurrentThread.cpp @@ -48,17 +48,7 @@ MemoryTracker * CurrentThread::getMemoryTracker() { if (unlikely(!current_thread)) return nullptr; - return current_thread->memory_tracker_ptr; -} - -DisableMemoryTrackerGuard CurrentThread::temporaryDisableMemoryTracker() -{ - static MemoryTracker * no_tracker = nullptr; - - if (unlikely(!current_thread)) - return DisableMemoryTrackerGuard(no_tracker); - - return DisableMemoryTrackerGuard(current_thread->memory_tracker_ptr); + return ¤t_thread->memory_tracker; } void CurrentThread::updateProgressIn(const Progress & value) diff --git a/dbms/src/Common/CurrentThread.h b/dbms/src/Common/CurrentThread.h index 6954cbc1418..1e0140c6330 100644 --- a/dbms/src/Common/CurrentThread.h +++ b/dbms/src/Common/CurrentThread.h @@ -52,7 +52,6 @@ public: static ProfileEvents::Counters & getProfileEvents(); static MemoryTracker * getMemoryTracker(); - static DisableMemoryTrackerGuard temporaryDisableMemoryTracker(); static inline Int64 & getUntrackedMemory() { diff --git a/dbms/src/Common/ThreadStatus.cpp b/dbms/src/Common/ThreadStatus.cpp index 05101377dfd..ff6c23c1794 100644 --- a/dbms/src/Common/ThreadStatus.cpp +++ b/dbms/src/Common/ThreadStatus.cpp @@ -32,7 +32,7 @@ TasksStatsCounters TasksStatsCounters::current() return res; } -ThreadStatus::ThreadStatus() : memory_tracker_ptr(&memory_tracker) +ThreadStatus::ThreadStatus() { thread_number = getThreadNumber(); os_thread_id = TaskStatsInfoGetter::getCurrentTID(); diff --git a/dbms/src/Common/ThreadStatus.h b/dbms/src/Common/ThreadStatus.h index c321fca5d57..2ba55fa07d0 100644 --- a/dbms/src/Common/ThreadStatus.h +++ b/dbms/src/Common/ThreadStatus.h @@ -77,31 +77,6 @@ using ThreadGroupStatusPtr = std::shared_ptr; extern thread_local ThreadStatus * current_thread; - -/// Helper which allows to temporarily disable memory tracker. -/// Use parent memory tracker instead of current if can. -class DisableMemoryTrackerGuard -{ -public: - explicit DisableMemoryTrackerGuard(MemoryTracker *& memory_tracker_ptr_) - : memory_tracker_ptr(memory_tracker_ptr_) - , prev_value(memory_tracker_ptr ? memory_tracker_ptr->getParent() : nullptr) - { - if (memory_tracker_ptr) - std::swap(memory_tracker_ptr, prev_value); - } - - ~DisableMemoryTrackerGuard() - { - if (prev_value) - std::swap(memory_tracker_ptr, prev_value); - } - -private: - MemoryTracker *& memory_tracker_ptr; - MemoryTracker * prev_value = nullptr; -}; - /** Encapsulates all per-thread info (ProfileEvents, MemoryTracker, query_id, query context, etc.). * The object must be created in thread function and destroyed in the same thread before the exit. * It is accessed through thread-local pointer. @@ -124,8 +99,6 @@ public: /// TODO: merge them into common entity ProfileEvents::Counters performance_counters{VariableContext::Thread}; MemoryTracker memory_tracker{VariableContext::Thread}; - /// Pointer to memory tracker. Is initialized in constructor. May be temporary disabled (set to nullptr). - MemoryTracker * memory_tracker_ptr = nullptr; /// Small amount of untracked memory (per thread atomic-less counter) Int64 untracked_memory = 0; diff --git a/dbms/src/Interpreters/executeQuery.cpp b/dbms/src/Interpreters/executeQuery.cpp index a76167b2b3a..58ca97b42c1 100644 --- a/dbms/src/Interpreters/executeQuery.cpp +++ b/dbms/src/Interpreters/executeQuery.cpp @@ -122,8 +122,7 @@ static void setExceptionStackTrace(QueryLogElement & elem) { /// Disable memory tracker for stack trace. /// Because if exception is "Memory limit (for query) exceed", then we probably can't allocate another one string. - /// This memory will be tracked for user, so won't have memory leak. - auto guard = CurrentThread::temporaryDisableMemoryTracker(); + auto temporarily_disable_memory_tracker = getCurrentMemoryTrackerActionLock(); try {