From de30962ebd5d57d83ce53851b90492170b3fe29f Mon Sep 17 00:00:00 2001 From: hexiaoting Date: Wed, 21 Jul 2021 14:43:40 +0800 Subject: [PATCH 01/80] Fuse multi quantile funcs with same arguments to one function. --- src/Core/Settings.h | 1 + .../GatherFunctionQuantileVisitor.h | 82 +++++++++++++++++++ src/Interpreters/TreeOptimizer.cpp | 55 +++++++++++++ ...01956_fuse_quantile_optimization.reference | 70 ++++++++++++++++ .../01956_fuse_quantile_optimization.sql | 54 ++++++++++++ 5 files changed, 262 insertions(+) create mode 100644 src/Interpreters/GatherFunctionQuantileVisitor.h create mode 100644 tests/queries/0_stateless/01956_fuse_quantile_optimization.reference create mode 100644 tests/queries/0_stateless/01956_fuse_quantile_optimization.sql diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 77e6d0c674a..89f16ab24a0 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -440,6 +440,7 @@ class IColumn; M(Bool, enable_global_with_statement, true, "Propagate WITH statements to UNION queries and all subqueries", 0) \ M(Bool, aggregate_functions_null_for_empty, false, "Rewrite all aggregate functions in a query, adding -OrNull suffix to them", 0) \ M(Bool, optimize_fuse_sum_count_avg, false, "Fuse aggregate functions sum(), avg(), count() with identical arguments into one sumCount() call, if the query has at least two different functions", 0) \ + M(Bool, optimize_fuse_quantile, false, "Fuse multiply quantile-family fuctions with the same argument into quantilesXXX()[]", 0) \ M(Bool, flatten_nested, true, "If true, columns of type Nested will be flatten to separate array columns instead of one array of tuples", 0) \ M(Bool, asterisk_include_materialized_columns, false, "Include MATERIALIZED columns for wildcard query", 0) \ M(Bool, asterisk_include_alias_columns, false, "Include ALIAS columns for wildcard query", 0) \ diff --git a/src/Interpreters/GatherFunctionQuantileVisitor.h b/src/Interpreters/GatherFunctionQuantileVisitor.h new file mode 100644 index 00000000000..072e24a37fd --- /dev/null +++ b/src/Interpreters/GatherFunctionQuantileVisitor.h @@ -0,0 +1,82 @@ +#pragma once + +#include +#include +#include +#include + +namespace DB +{ + + static const std::unordered_map quantile_fuse_name_mapping = { + {NameQuantile::name, NameQuantiles::name}, + {NameQuantileDeterministic::name, NameQuantilesDeterministic::name}, + {NameQuantileExact::name, NameQuantilesExact::name}, + {NameQuantileExactLow::name, NameQuantilesExactLow::name}, + {NameQuantileExactHigh::name, NameQuantilesExactHigh::name}, + {NameQuantileExactExclusive::name, NameQuantilesExactExclusive::name}, + {NameQuantileExactInclusive::name, NameQuantilesExactInclusive::name}, + {NameQuantileExactWeighted::name, NameQuantilesExactWeighted::name}, + {NameQuantileTiming::name, NameQuantilesTiming::name}, + {NameQuantileTimingWeighted::name, NameQuantilesTimingWeighted::name}, + {NameQuantileTDigest::name, NameQuantilesTDigest::name}, + {NameQuantileTDigestWeighted::name, NameQuantilesTDigestWeighted::name}, + {NameQuantileBFloat16::name, NameQuantilesBFloat16::name} + }; + +/// Gather all the quantilexxx functions +class GatherFunctionQuantileData +{ +public: + struct FuseQuantileAggregatesData + { + std::unordered_map> arg_map_function; + void addFuncNode(ASTFunction * func) + { + auto argument = func->arguments->children.at(0)->getColumnName(); + + /// This functions needs two arguments. + if (func->name == NameQuantileDeterministic::name + || func->name == NameQuantileExactWeighted::name + || func->name == NameQuantileTimingWeighted::name + || func->name == NameQuantileTDigestWeighted::name) + argument = argument + "," + func->arguments->children.at(1)->getColumnName(); + + auto it = arg_map_function.find(argument); + if (it != arg_map_function.end()) + { + it->second.push_back(func); + } + else + { + std::vector new_func_list; + new_func_list.push_back(func); + arg_map_function[argument] = new_func_list; + } + } + }; + + using TypeToVisit = ASTFunction; + std::unordered_map fuse_quantile; + void visit(ASTFunction & function, ASTPtr &) + { + if (quantile_fuse_name_mapping.find(function.name) == quantile_fuse_name_mapping.end()) + return; + + auto it = fuse_quantile.find(function.name); + if (it != fuse_quantile.end()) + { + it->second.addFuncNode(&function); + } + else + { + FuseQuantileAggregatesData funcs{}; + funcs.addFuncNode(&function); + fuse_quantile[function.name] = funcs; + } + } +}; + +using GatherFunctionQuantileVisitor = InDepthNodeVisitor, true>; + +} diff --git a/src/Interpreters/TreeOptimizer.cpp b/src/Interpreters/TreeOptimizer.cpp index c1a265d9a06..9493198d935 100644 --- a/src/Interpreters/TreeOptimizer.cpp +++ b/src/Interpreters/TreeOptimizer.cpp @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -39,6 +40,7 @@ namespace DB namespace ErrorCodes { extern const int LOGICAL_ERROR; + extern const int BAD_QUERY_PARAMETER; } namespace @@ -587,6 +589,56 @@ void optimizeFunctionsToSubcolumns(ASTPtr & query, const StorageMetadataPtr & me RewriteFunctionToSubcolumnVisitor(data).visit(query); } +/// Rewrites multi quantile()() functions with the same arguments to quantiles()()[] +/// eg:SELECT quantile(0.5)(x), quantile(0.9)(x), quantile(0.95)(x) FROM... +/// rewrite to : SELECT quantiles(0.5, 0.9, 0.95)(x)[1], quantiles(0.5, 0.9, 0.95)(x)[2], quantiles(0.5, 0.9, 0.95)(x)[3] FROM ... +void fuseCandidate(std::unordered_map & fuse_quantile) +{ + for (auto candidate : fuse_quantile) + { + String func_name = candidate.first; + GatherFunctionQuantileData::FuseQuantileAggregatesData args_to_functions = candidate.second; + + // Try to fuse multiply quantilexxx Function to one + for (auto it : args_to_functions.arg_map_function) + { + std::vector functions = it.second; + size_t count = functions.size(); + if (count > 1) + { + auto param_exp_list = std::make_shared(); + for (auto func : functions) + { + const ASTs & parameters = func->parameters->as().children; + assert(parameters.size() == 1); + param_exp_list->children.push_back(parameters[0]); + } + functions[0]->parameters = param_exp_list; + functions[0]->name = quantile_fuse_name_mapping.find(func_name)->second; + auto func_base = functions[0]->clone(); + + for (size_t i = 0; i < count; ++i) + { + functions[i]->name = "arrayElement"; + auto func_exp_list = std::make_shared(); + func_exp_list->children.push_back(func_base); + func_exp_list->children.push_back(std::make_shared(i + 1)); + functions[i]->children.clear(); + functions[i]->parameters = nullptr; + functions[i]->arguments = func_exp_list; + functions[i]->children.push_back(func_exp_list); + } + } + } + } +} +void optimizeFuseQuantileFunctions(ASTPtr & query) +{ + GatherFunctionQuantileVisitor::Data data{}; + GatherFunctionQuantileVisitor(data).visit(query); + fuseCandidate(data.fuse_quantile); +} + } void TreeOptimizer::optimizeIf(ASTPtr & query, Aliases & aliases, bool if_chain_to_multiif) @@ -684,6 +736,9 @@ void TreeOptimizer::apply(ASTPtr & query, TreeRewriterResult & result, /// Remove duplicated columns from USING(...). optimizeUsing(select_query); + + if (settings.optimize_fuse_quantile) + optimizeFuseQuantileFunctions(query); } } diff --git a/tests/queries/0_stateless/01956_fuse_quantile_optimization.reference b/tests/queries/0_stateless/01956_fuse_quantile_optimization.reference new file mode 100644 index 00000000000..a0178759db7 --- /dev/null +++ b/tests/queries/0_stateless/01956_fuse_quantile_optimization.reference @@ -0,0 +1,70 @@ +2016-06-15 23:00:00 2016-06-15 23:00:00 +2016-06-15 23:00:00 2016-06-15 23:00:00 +2016-06-15 23:00:00 2016-06-15 23:00:00 +2016-06-15 23:00:00 2016-06-15 23:00:00 2016-06-15 23:00:00 +30000 30000 30000 +30000 30000 30000 +2016-06-15 23:00:16 2016-06-15 23:00:16 2016-06-15 23:00:16 +2016-06-15 23:00:16 2016-06-15 23:00:16 2016-06-15 23:00:16 +2016-04-02 17:23:12 2016-04-02 17:23:12 2016-04-02 17:23:12 +---------After fuse result----------- +quantile: +SELECT + quantiles(0.2, 0.3)(d)[1], + quantiles(0.2, 0.3)(d)[2] +FROM datetime +2016-06-15 23:00:00 2016-06-15 23:00:00 +quantileDeterministic: +SELECT + quantilesDeterministic(0.2, 0.5)(d, 1)[1], + quantilesDeterministic(0.2, 0.5)(d, 1)[2] +FROM datetime +2016-06-15 23:00:00 2016-06-15 23:00:00 +quantileExact: +SELECT + quantilesExact(0.2, 0.5)(d)[1], + quantilesExact(0.2, 0.5)(d)[2] +FROM datetime +2016-06-15 23:00:00 2016-06-15 23:00:00 +quantileExactWeighted: +SELECT + quantilesExactWeighted(0.2, 0.4)(d, 1)[1], + quantilesExactWeighted(0.2, 0.4)(d, 1)[2], + quantileExactWeighted(0.3)(d, 2) +FROM datetime +2016-06-15 23:00:00 2016-06-15 23:00:00 2016-06-15 23:00:00 +quantileTiming: +SELECT + quantilesTiming(0.2, 0.3)(d)[1], + quantilesTiming(0.2, 0.3)(d)[2], + quantileTiming(0.2)(d + 1) +FROM datetime +30000 30000 30000 +quantileTimingWeighted: +SELECT + quantilesTimingWeighted(0.2, 0.3)(d, 1)[1], + quantilesTimingWeighted(0.2, 0.3)(d, 1)[2], + quantileTimingWeighted(0.2)(d, 2) +FROM datetime +30000 30000 30000 +quantileTDigest: +SELECT + quantilesTDigest(0.2, 0.3)(d)[1], + quantilesTDigest(0.2, 0.3)(d)[2], + quantileTDigest(0.2)(d + 1) +FROM datetime +2016-06-15 23:00:16 2016-06-15 23:00:16 2016-06-15 23:00:16 +quantileTDigestWeighted: +SELECT + quantilesTDigestWeighted(0.2, 0.3)(d, 1)[1], + quantilesTDigestWeighted(0.2, 0.3)(d, 1)[2], + quantileTDigestWeighted(0.4)(d, 2) +FROM datetime +2016-06-15 23:00:16 2016-06-15 23:00:16 2016-06-15 23:00:16 +quantileBFloat16: +SELECT + quantilesBFloat16(0.2, 0.3)(d)[1], + quantilesBFloat16(0.2, 0.3)(d)[2], + quantileBFloat16(0.4)(d + 1) +FROM datetime +2016-04-02 17:23:12 2016-04-02 17:23:12 2016-04-02 17:23:12 diff --git a/tests/queries/0_stateless/01956_fuse_quantile_optimization.sql b/tests/queries/0_stateless/01956_fuse_quantile_optimization.sql new file mode 100644 index 00000000000..9fb72f6646b --- /dev/null +++ b/tests/queries/0_stateless/01956_fuse_quantile_optimization.sql @@ -0,0 +1,54 @@ +DROP TABLE IF EXISTS datetime; +CREATE TABLE datetime (d DateTime('UTC')) ENGINE = Memory; +INSERT INTO datetime(d) VALUES(toDateTime('2016-06-15 23:00:00', 'UTC')) + +SELECT quantile(0.2)(d), quantile(0.3)(d) FROM datetime; +SELECT quantileDeterministic(0.2)(d, 1), quantileDeterministic(0.5)(d, 1) FROM datetime; +SELECT quantileExact(0.2)(d), quantileExact(0.5)(d) FROM datetime; +SELECT quantileExactWeighted(0.2)(d, 1), quantileExactWeighted(0.4)(d, 1), quantileExactWeighted(0.3)(d, 2) FROM datetime; +SELECT quantileTiming(0.2)(d), quantileTiming(0.3)(d), quantileTiming(0.2)(d+1) FROM datetime; +SELECT quantileTimingWeighted(0.2)(d, 1), quantileTimingWeighted(0.3)(d, 1), quantileTimingWeighted(0.2)(d, 2) FROM datetime; +SELECT quantileTDigest(0.2)(d), quantileTDigest(0.3)(d), quantileTDigest(0.2)(d + 1) FROM datetime; +SELECT quantileTDigestWeighted(0.2)(d, 1), quantileTDigestWeighted(0.3)(d, 1), quantileTDigestWeighted(0.4)(d, 2) FROM datetime; +SELECT quantileBFloat16(0.2)(d), quantileBFloat16(0.3)(d), quantileBFloat16(0.4)(d + 1) FROM datetime; + + +SELECT '---------After fuse result-----------'; +set optimize_fuse_quantile=true; +SELECT 'quantile:'; +EXPLAIN SYNTAX SELECT quantile(0.2)(d), quantile(0.3)(d) FROM datetime; +SELECT quantile(0.2)(d), quantile(0.3)(d) FROM datetime; + +SELECT 'quantileDeterministic:'; +EXPLAIN SYNTAX SELECT quantileDeterministic(0.2)(d, 1), quantileDeterministic(0.5)(d, 1) FROM datetime; +SELECT quantileDeterministic(0.2)(d, 1), quantileDeterministic(0.5)(d, 1) FROM datetime; + +SELECT 'quantileExact:'; +EXPLAIN SYNTAX SELECT quantileExact(0.2)(d), quantileExact(0.5)(d) FROM datetime; +SELECT quantileExact(0.2)(d), quantileExact(0.5)(d) FROM datetime; + +SELECT 'quantileExactWeighted:'; +EXPLAIN SYNTAX SELECT quantileExactWeighted(0.2)(d, 1), quantileExactWeighted(0.4)(d, 1), quantileExactWeighted(0.3)(d, 2) FROM datetime; +SELECT quantileExactWeighted(0.2)(d, 1), quantileExactWeighted(0.4)(d, 1), quantileExactWeighted(0.3)(d, 2) FROM datetime; + +SELECT 'quantileTiming:'; +EXPLAIN SYNTAX SELECT quantileTiming(0.2)(d), quantileTiming(0.3)(d), quantileTiming(0.2)(d+1) FROM datetime; +SELECT quantileTiming(0.2)(d), quantileTiming(0.3)(d), quantileTiming(0.2)(d+1) FROM datetime; + +SELECT 'quantileTimingWeighted:'; +EXPLAIN SYNTAX SELECT quantileTimingWeighted(0.2)(d, 1), quantileTimingWeighted(0.3)(d, 1), quantileTimingWeighted(0.2)(d, 2) FROM datetime; +SELECT quantileTimingWeighted(0.2)(d, 1), quantileTimingWeighted(0.3)(d, 1), quantileTimingWeighted(0.2)(d, 2) FROM datetime; + +SELECT 'quantileTDigest:'; +EXPLAIN SYNTAX SELECT quantileTDigest(0.2)(d), quantileTDigest(0.3)(d), quantileTDigest(0.2)(d + 1) FROM datetime; +SELECT quantileTDigest(0.2)(d), quantileTDigest(0.3)(d), quantileTDigest(0.2)(d + 1) FROM datetime; + +SELECT 'quantileTDigestWeighted:'; +EXPLAIN SYNTAX SELECT quantileTDigestWeighted(0.2)(d, 1), quantileTDigestWeighted(0.3)(d, 1), quantileTDigestWeighted(0.4)(d, 2) FROM datetime; +SELECT quantileTDigestWeighted(0.2)(d, 1), quantileTDigestWeighted(0.3)(d, 1), quantileTDigestWeighted(0.4)(d, 2) FROM datetime; + +SELECT 'quantileBFloat16:'; +EXPLAIN SYNTAX SELECT quantileBFloat16(0.2)(d), quantileBFloat16(0.3)(d), quantileBFloat16(0.4)(d + 1) FROM datetime; +SELECT quantileBFloat16(0.2)(d), quantileBFloat16(0.3)(d), quantileBFloat16(0.4)(d + 1) FROM datetime; + +DROP TABLE datetime; From 6b73fa530749771e9fff923878b0bff3a72b8e52 Mon Sep 17 00:00:00 2001 From: hexiaoting Date: Fri, 23 Jul 2021 10:57:34 +0800 Subject: [PATCH 02/80] Fix style check errors --- src/Core/Settings.h | 2 +- .../GatherFunctionQuantileVisitor.h | 31 +++++++++---------- src/Interpreters/TreeOptimizer.cpp | 16 +++++----- 3 files changed, 24 insertions(+), 25 deletions(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 89f16ab24a0..fe9aa44e9f7 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -440,7 +440,7 @@ class IColumn; M(Bool, enable_global_with_statement, true, "Propagate WITH statements to UNION queries and all subqueries", 0) \ M(Bool, aggregate_functions_null_for_empty, false, "Rewrite all aggregate functions in a query, adding -OrNull suffix to them", 0) \ M(Bool, optimize_fuse_sum_count_avg, false, "Fuse aggregate functions sum(), avg(), count() with identical arguments into one sumCount() call, if the query has at least two different functions", 0) \ - M(Bool, optimize_fuse_quantile, false, "Fuse multiply quantile-family fuctions with the same argument into quantilesXXX()[]", 0) \ + M(Bool, optimize_fuse_quantile, false, "Fuse multiply quantile-family functions with the same argument into quantilesXXX()[]", 0) \ M(Bool, flatten_nested, true, "If true, columns of type Nested will be flatten to separate array columns instead of one array of tuples", 0) \ M(Bool, asterisk_include_materialized_columns, false, "Include MATERIALIZED columns for wildcard query", 0) \ M(Bool, asterisk_include_alias_columns, false, "Include ALIAS columns for wildcard query", 0) \ diff --git a/src/Interpreters/GatherFunctionQuantileVisitor.h b/src/Interpreters/GatherFunctionQuantileVisitor.h index 072e24a37fd..3db65c4bb66 100644 --- a/src/Interpreters/GatherFunctionQuantileVisitor.h +++ b/src/Interpreters/GatherFunctionQuantileVisitor.h @@ -7,22 +7,21 @@ namespace DB { - - static const std::unordered_map quantile_fuse_name_mapping = { - {NameQuantile::name, NameQuantiles::name}, - {NameQuantileDeterministic::name, NameQuantilesDeterministic::name}, - {NameQuantileExact::name, NameQuantilesExact::name}, - {NameQuantileExactLow::name, NameQuantilesExactLow::name}, - {NameQuantileExactHigh::name, NameQuantilesExactHigh::name}, - {NameQuantileExactExclusive::name, NameQuantilesExactExclusive::name}, - {NameQuantileExactInclusive::name, NameQuantilesExactInclusive::name}, - {NameQuantileExactWeighted::name, NameQuantilesExactWeighted::name}, - {NameQuantileTiming::name, NameQuantilesTiming::name}, - {NameQuantileTimingWeighted::name, NameQuantilesTimingWeighted::name}, - {NameQuantileTDigest::name, NameQuantilesTDigest::name}, - {NameQuantileTDigestWeighted::name, NameQuantilesTDigestWeighted::name}, - {NameQuantileBFloat16::name, NameQuantilesBFloat16::name} - }; +static const std::unordered_map quantile_fuse_name_mapping = { + {NameQuantile::name, NameQuantiles::name}, + {NameQuantileDeterministic::name, NameQuantilesDeterministic::name}, + {NameQuantileExact::name, NameQuantilesExact::name}, + {NameQuantileExactLow::name, NameQuantilesExactLow::name}, + {NameQuantileExactHigh::name, NameQuantilesExactHigh::name}, + {NameQuantileExactExclusive::name, NameQuantilesExactExclusive::name}, + {NameQuantileExactInclusive::name, NameQuantilesExactInclusive::name}, + {NameQuantileExactWeighted::name, NameQuantilesExactWeighted::name}, + {NameQuantileTiming::name, NameQuantilesTiming::name}, + {NameQuantileTimingWeighted::name, NameQuantilesTimingWeighted::name}, + {NameQuantileTDigest::name, NameQuantilesTDigest::name}, + {NameQuantileTDigestWeighted::name, NameQuantilesTDigestWeighted::name}, + {NameQuantileBFloat16::name, NameQuantilesBFloat16::name} +}; /// Gather all the quantilexxx functions class GatherFunctionQuantileData diff --git a/src/Interpreters/TreeOptimizer.cpp b/src/Interpreters/TreeOptimizer.cpp index 9493198d935..48ccb900618 100644 --- a/src/Interpreters/TreeOptimizer.cpp +++ b/src/Interpreters/TreeOptimizer.cpp @@ -40,7 +40,6 @@ namespace DB namespace ErrorCodes { extern const int LOGICAL_ERROR; - extern const int BAD_QUERY_PARAMETER; } namespace @@ -617,16 +616,17 @@ void fuseCandidate(std::unordered_mapname = quantile_fuse_name_mapping.find(func_name)->second; auto func_base = functions[0]->clone(); - for (size_t i = 0; i < count; ++i) + size_t idx = 0; + for (auto func : functions) { - functions[i]->name = "arrayElement"; + func->name = "arrayElement"; auto func_exp_list = std::make_shared(); func_exp_list->children.push_back(func_base); - func_exp_list->children.push_back(std::make_shared(i + 1)); - functions[i]->children.clear(); - functions[i]->parameters = nullptr; - functions[i]->arguments = func_exp_list; - functions[i]->children.push_back(func_exp_list); + func_exp_list->children.push_back(std::make_shared(++idx)); + func->children.clear(); + func->parameters = nullptr; + func->arguments = func_exp_list; + func->children.push_back(func_exp_list); } } } From b060c0abb8f41e3d6525f3f6c4998d71b546513c Mon Sep 17 00:00:00 2001 From: hexiaoting Date: Fri, 23 Jul 2021 14:31:13 +0800 Subject: [PATCH 03/80] Fix style error --- src/Interpreters/TreeOptimizer.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Interpreters/TreeOptimizer.cpp b/src/Interpreters/TreeOptimizer.cpp index 48ccb900618..a23aeabb297 100644 --- a/src/Interpreters/TreeOptimizer.cpp +++ b/src/Interpreters/TreeOptimizer.cpp @@ -593,20 +593,20 @@ void optimizeFunctionsToSubcolumns(ASTPtr & query, const StorageMetadataPtr & me /// rewrite to : SELECT quantiles(0.5, 0.9, 0.95)(x)[1], quantiles(0.5, 0.9, 0.95)(x)[2], quantiles(0.5, 0.9, 0.95)(x)[3] FROM ... void fuseCandidate(std::unordered_map & fuse_quantile) { - for (auto candidate : fuse_quantile) + for (const auto & candidate : fuse_quantile) { String func_name = candidate.first; GatherFunctionQuantileData::FuseQuantileAggregatesData args_to_functions = candidate.second; // Try to fuse multiply quantilexxx Function to one - for (auto it : args_to_functions.arg_map_function) + for (const auto & it : args_to_functions.arg_map_function) { std::vector functions = it.second; size_t count = functions.size(); if (count > 1) { auto param_exp_list = std::make_shared(); - for (auto func : functions) + for (auto * func : functions) { const ASTs & parameters = func->parameters->as().children; assert(parameters.size() == 1); @@ -617,7 +617,7 @@ void fuseCandidate(std::unordered_mapclone(); size_t idx = 0; - for (auto func : functions) + for (auto * func : functions) { func->name = "arrayElement"; auto func_exp_list = std::make_shared(); From 1fe012bdfadd38840fd199067dacc5c4df8e78ed Mon Sep 17 00:00:00 2001 From: hexiaoting Date: Wed, 25 Aug 2021 16:24:53 +0800 Subject: [PATCH 04/80] Support Alias and rewrite some code --- .../GatherFunctionQuantileVisitor.h | 35 +++++-------------- src/Interpreters/TreeOptimizer.cpp | 32 +++++++---------- ...01956_fuse_quantile_optimization.reference | 20 +++++++++++ .../01956_fuse_quantile_optimization.sql | 4 +++ 4 files changed, 46 insertions(+), 45 deletions(-) diff --git a/src/Interpreters/GatherFunctionQuantileVisitor.h b/src/Interpreters/GatherFunctionQuantileVisitor.h index 3db65c4bb66..21411ec89ef 100644 --- a/src/Interpreters/GatherFunctionQuantileVisitor.h +++ b/src/Interpreters/GatherFunctionQuantileVisitor.h @@ -7,6 +7,7 @@ namespace DB { +/// Mapping from quantile functions for single value to plural static const std::unordered_map quantile_fuse_name_mapping = { {NameQuantile::name, NameQuantiles::name}, {NameQuantileDeterministic::name, NameQuantilesDeterministic::name}, @@ -23,15 +24,16 @@ static const std::unordered_map quantile_fuse_name_mapping = { {NameQuantileBFloat16::name, NameQuantilesBFloat16::name} }; -/// Gather all the quantilexxx functions +/// Gather all the `quantile*` functions class GatherFunctionQuantileData { public: struct FuseQuantileAggregatesData { - std::unordered_map> arg_map_function; - void addFuncNode(ASTFunction * func) + std::unordered_map> arg_map_function; + void addFuncNode(ASTPtr & ast) { + const auto * func = ast->as(); auto argument = func->arguments->children.at(0)->getColumnName(); /// This functions needs two arguments. @@ -41,38 +43,19 @@ public: || func->name == NameQuantileTDigestWeighted::name) argument = argument + "," + func->arguments->children.at(1)->getColumnName(); - auto it = arg_map_function.find(argument); - if (it != arg_map_function.end()) - { - it->second.push_back(func); - } - else - { - std::vector new_func_list; - new_func_list.push_back(func); - arg_map_function[argument] = new_func_list; - } + arg_map_function[argument].push_back(&ast); } }; using TypeToVisit = ASTFunction; std::unordered_map fuse_quantile; - void visit(ASTFunction & function, ASTPtr &) + + void visit(ASTFunction & function, ASTPtr & ast) { if (quantile_fuse_name_mapping.find(function.name) == quantile_fuse_name_mapping.end()) return; - auto it = fuse_quantile.find(function.name); - if (it != fuse_quantile.end()) - { - it->second.addFuncNode(&function); - } - else - { - FuseQuantileAggregatesData funcs{}; - funcs.addFuncNode(&function); - fuse_quantile[function.name] = funcs; - } + fuse_quantile[function.name].addFuncNode(ast); } }; diff --git a/src/Interpreters/TreeOptimizer.cpp b/src/Interpreters/TreeOptimizer.cpp index a23aeabb297..ce9fb1b81f8 100644 --- a/src/Interpreters/TreeOptimizer.cpp +++ b/src/Interpreters/TreeOptimizer.cpp @@ -593,40 +593,34 @@ void optimizeFunctionsToSubcolumns(ASTPtr & query, const StorageMetadataPtr & me /// rewrite to : SELECT quantiles(0.5, 0.9, 0.95)(x)[1], quantiles(0.5, 0.9, 0.95)(x)[2], quantiles(0.5, 0.9, 0.95)(x)[3] FROM ... void fuseCandidate(std::unordered_map & fuse_quantile) { - for (const auto & candidate : fuse_quantile) + for (auto & candidate : fuse_quantile) { String func_name = candidate.first; - GatherFunctionQuantileData::FuseQuantileAggregatesData args_to_functions = candidate.second; + GatherFunctionQuantileData::FuseQuantileAggregatesData & args_to_functions = candidate.second; - // Try to fuse multiply quantilexxx Function to one - for (const auto & it : args_to_functions.arg_map_function) + // Try to fuse multiply `quantile*` Function to plural + for (auto & it : args_to_functions.arg_map_function) { - std::vector functions = it.second; + std::vector & functions = it.second; size_t count = functions.size(); if (count > 1) { auto param_exp_list = std::make_shared(); - for (auto * func : functions) + for (auto ast : functions) { - const ASTs & parameters = func->parameters->as().children; + const ASTs & parameters = (*ast)->as()->parameters->as().children; assert(parameters.size() == 1); param_exp_list->children.push_back(parameters[0]); } - functions[0]->parameters = param_exp_list; - functions[0]->name = quantile_fuse_name_mapping.find(func_name)->second; - auto func_base = functions[0]->clone(); + auto func_base = makeASTFunction(quantile_fuse_name_mapping.find(func_name)->second, (*functions[0])->as()->arguments->children); + func_base->parameters = param_exp_list; size_t idx = 0; - for (auto * func : functions) + for (auto & ast : functions) { - func->name = "arrayElement"; - auto func_exp_list = std::make_shared(); - func_exp_list->children.push_back(func_base); - func_exp_list->children.push_back(std::make_shared(++idx)); - func->children.clear(); - func->parameters = nullptr; - func->arguments = func_exp_list; - func->children.push_back(func_exp_list); + auto ast_new = makeASTFunction("arrayElement", func_base, std::make_shared(UInt64(++idx))); + ast_new->setAlias((*ast)->tryGetAlias()); + *ast = ast_new; } } } diff --git a/tests/queries/0_stateless/01956_fuse_quantile_optimization.reference b/tests/queries/0_stateless/01956_fuse_quantile_optimization.reference index a0178759db7..c66efbeadfb 100644 --- a/tests/queries/0_stateless/01956_fuse_quantile_optimization.reference +++ b/tests/queries/0_stateless/01956_fuse_quantile_optimization.reference @@ -68,3 +68,23 @@ SELECT quantileBFloat16(0.4)(d + 1) FROM datetime 2016-04-02 17:23:12 2016-04-02 17:23:12 2016-04-02 17:23:12 +SELECT + quantiles(0.2, 0.3, 0.2)(d)[1] AS k, + quantiles(0.2, 0.3, 0.2)(d)[2] +FROM datetime +ORDER BY quantiles(0.2, 0.3, 0.2)(d)[3] ASC +0 4 7.2 7.6 +1 5 8.2 8.6 +SELECT + b, + quantiles(0.5, 0.9, 0.95)(x)[1] AS a, + quantiles(0.5, 0.9, 0.95)(x)[2] AS y, + quantiles(0.5, 0.9, 0.95)(x)[3] +FROM +( + SELECT + number AS x, + number % 2 AS b + FROM numbers(10) +) +GROUP BY b diff --git a/tests/queries/0_stateless/01956_fuse_quantile_optimization.sql b/tests/queries/0_stateless/01956_fuse_quantile_optimization.sql index 9fb72f6646b..1f476740aef 100644 --- a/tests/queries/0_stateless/01956_fuse_quantile_optimization.sql +++ b/tests/queries/0_stateless/01956_fuse_quantile_optimization.sql @@ -51,4 +51,8 @@ SELECT 'quantileBFloat16:'; EXPLAIN SYNTAX SELECT quantileBFloat16(0.2)(d), quantileBFloat16(0.3)(d), quantileBFloat16(0.4)(d + 1) FROM datetime; SELECT quantileBFloat16(0.2)(d), quantileBFloat16(0.3)(d), quantileBFloat16(0.4)(d + 1) FROM datetime; +EXPLAIN SYNTAX SELECT quantile(0.2)(d) as k, quantile(0.3)(d) FROM datetime order by quantile(0.2)(d); DROP TABLE datetime; + +SELECT b, quantile(0.5)(x) as a, quantile(0.9)(x) as y, quantile(0.95)(x) FROM (select number as x, number % 2 as b from numbers(10)) group by b; +EXPLAIN SYNTAX SELECT b, quantile(0.5)(x) as a, quantile(0.9)(x) as y, quantile(0.95)(x) FROM (select number as x, number % 2 as b from numbers(10)) group by b; From 2dfd5b14db6768e19d1ac1d5b45eac67c94678ad Mon Sep 17 00:00:00 2001 From: kssenii Date: Fri, 20 Aug 2021 20:59:57 +0300 Subject: [PATCH 05/80] alter --- src/Parsers/ASTAlterQuery.cpp | 19 +- src/Parsers/ASTAlterQuery.h | 10 +- src/Parsers/ParserAlterQuery.cpp | 1147 +++++++++++++++--------------- src/Parsers/ParserAlterQuery.h | 11 +- 4 files changed, 615 insertions(+), 572 deletions(-) diff --git a/src/Parsers/ASTAlterQuery.cpp b/src/Parsers/ASTAlterQuery.cpp index d4424a60ffc..0e617ca7c21 100644 --- a/src/Parsers/ASTAlterQuery.cpp +++ b/src/Parsers/ASTAlterQuery.cpp @@ -461,11 +461,22 @@ void ASTAlterQuery::formatQueryImpl(const FormatSettings & settings, FormatState frame.need_parens = false; std::string indent_str = settings.one_line ? "" : std::string(4u * frame.indent, ' '); + settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str; - if (is_live_view) - settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "ALTER LIVE VIEW " << (settings.hilite ? hilite_none : ""); - else - settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "ALTER TABLE " << (settings.hilite ? hilite_none : ""); + switch (alter_object) + { + case AlterObjectType::TABLE: + settings.ostr << "ALTER TABLE "; + break; + case AlterObjectType::DATABASE: + settings.ostr << "ALTER DATABASE "; + break; + case AlterObjectType::LIVE_VIEW: + settings.ostr << "ALTER LIVE VIEW "; + break; + } + + settings.ostr << (settings.hilite ? hilite_none : ""); if (!table.empty()) { diff --git a/src/Parsers/ASTAlterQuery.h b/src/Parsers/ASTAlterQuery.h index 0fd6d2805ea..dadba107ddc 100644 --- a/src/Parsers/ASTAlterQuery.h +++ b/src/Parsers/ASTAlterQuery.h @@ -208,7 +208,15 @@ protected: class ASTAlterQuery : public ASTQueryWithTableAndOutput, public ASTQueryWithOnCluster { public: - bool is_live_view{false}; /// true for ALTER LIVE VIEW + enum class AlterObjectType + { + TABLE, + DATABASE, + LIVE_VIEW + }; + + // bool is_live_view{false}; /// true for ALTER LIVE VIEW + AlterObjectType alter_object = AlterObjectType::TABLE; ASTExpressionList * command_list = nullptr; diff --git a/src/Parsers/ParserAlterQuery.cpp b/src/Parsers/ParserAlterQuery.cpp index b282f276762..cb1796a70b5 100644 --- a/src/Parsers/ParserAlterQuery.cpp +++ b/src/Parsers/ParserAlterQuery.cpp @@ -123,494 +123,40 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected ParserSelectWithUnionQuery select_p; ParserTTLExpressionList parser_ttl_list; - if (is_live_view) + switch (alter_object) { - if (s_refresh.ignore(pos, expected)) + case ASTAlterQuery::AlterObjectType::LIVE_VIEW: { - command->type = ASTAlterCommand::LIVE_VIEW_REFRESH; - } - else - return false; - } - else - { - if (s_add_column.ignore(pos, expected)) - { - if (s_if_not_exists.ignore(pos, expected)) - command->if_not_exists = true; - - if (!parser_col_decl.parse(pos, command->col_decl, expected)) - return false; - - if (s_first.ignore(pos, expected)) - command->first = true; - else if (s_after.ignore(pos, expected)) + if (s_refresh.ignore(pos, expected)) { - if (!parser_name.parse(pos, command->column, expected)) - return false; - } - - command->type = ASTAlterCommand::ADD_COLUMN; - } - else if (s_rename_column.ignore(pos, expected)) - { - if (s_if_exists.ignore(pos, expected)) - command->if_exists = true; - - if (!parser_name.parse(pos, command->column, expected)) - return false; - - if (!s_to.ignore(pos, expected)) - return false; - - if (!parser_name.parse(pos, command->rename_to, expected)) - return false; - - command->type = ASTAlterCommand::RENAME_COLUMN; - } - else if (s_drop_partition.ignore(pos, expected)) - { - if (!parser_partition.parse(pos, command->partition, expected)) - return false; - - command->type = ASTAlterCommand::DROP_PARTITION; - } - else if (s_drop_part.ignore(pos, expected)) - { - if (!parser_string_literal.parse(pos, command->partition, expected)) - return false; - - command->type = ASTAlterCommand::DROP_PARTITION; - command->part = true; - } - else if (s_drop_detached_partition.ignore(pos, expected)) - { - if (!parser_partition.parse(pos, command->partition, expected)) - return false; - - command->type = ASTAlterCommand::DROP_DETACHED_PARTITION; - } - else if (s_drop_detached_part.ignore(pos, expected)) - { - if (!parser_string_literal.parse(pos, command->partition, expected)) - return false; - - command->type = ASTAlterCommand::DROP_DETACHED_PARTITION; - command->part = true; - } - else if (s_drop_column.ignore(pos, expected)) - { - if (s_if_exists.ignore(pos, expected)) - command->if_exists = true; - - if (!parser_name.parse(pos, command->column, expected)) - return false; - - command->type = ASTAlterCommand::DROP_COLUMN; - command->detach = false; - } - else if (s_clear_column.ignore(pos, expected)) - { - if (s_if_exists.ignore(pos, expected)) - command->if_exists = true; - - if (!parser_name.parse(pos, command->column, expected)) - return false; - - command->type = ASTAlterCommand::DROP_COLUMN; - command->clear_column = true; - command->detach = false; - - if (s_in_partition.ignore(pos, expected)) - { - if (!parser_partition.parse(pos, command->partition, expected)) - return false; - } - } - else if (s_add_index.ignore(pos, expected)) - { - if (s_if_not_exists.ignore(pos, expected)) - command->if_not_exists = true; - - if (!parser_idx_decl.parse(pos, command->index_decl, expected)) - return false; - - if (s_first.ignore(pos, expected)) - command->first = true; - else if (s_after.ignore(pos, expected)) - { - if (!parser_name.parse(pos, command->index, expected)) - return false; - } - - command->type = ASTAlterCommand::ADD_INDEX; - } - else if (s_drop_index.ignore(pos, expected)) - { - if (s_if_exists.ignore(pos, expected)) - command->if_exists = true; - - if (!parser_name.parse(pos, command->index, expected)) - return false; - - command->type = ASTAlterCommand::DROP_INDEX; - command->detach = false; - } - else if (s_clear_index.ignore(pos, expected)) - { - if (s_if_exists.ignore(pos, expected)) - command->if_exists = true; - - if (!parser_name.parse(pos, command->index, expected)) - return false; - - command->type = ASTAlterCommand::DROP_INDEX; - command->clear_index = true; - command->detach = false; - - if (s_in_partition.ignore(pos, expected)) - { - if (!parser_partition.parse(pos, command->partition, expected)) - return false; - } - } - else if (s_materialize_index.ignore(pos, expected)) - { - if (s_if_exists.ignore(pos, expected)) - command->if_exists = true; - - if (!parser_name.parse(pos, command->index, expected)) - return false; - - command->type = ASTAlterCommand::MATERIALIZE_INDEX; - command->detach = false; - - if (s_in_partition.ignore(pos, expected)) - { - if (!parser_partition.parse(pos, command->partition, expected)) - return false; - } - } - else if (s_add_projection.ignore(pos, expected)) - { - if (s_if_not_exists.ignore(pos, expected)) - command->if_not_exists = true; - - if (!parser_projection_decl.parse(pos, command->projection_decl, expected)) - return false; - - if (s_first.ignore(pos, expected)) - command->first = true; - else if (s_after.ignore(pos, expected)) - { - if (!parser_name.parse(pos, command->projection, expected)) - return false; - } - - command->type = ASTAlterCommand::ADD_PROJECTION; - } - else if (s_drop_projection.ignore(pos, expected)) - { - if (s_if_exists.ignore(pos, expected)) - command->if_exists = true; - - if (!parser_name.parse(pos, command->projection, expected)) - return false; - - command->type = ASTAlterCommand::DROP_PROJECTION; - command->detach = false; - } - else if (s_clear_projection.ignore(pos, expected)) - { - if (s_if_exists.ignore(pos, expected)) - command->if_exists = true; - - if (!parser_name.parse(pos, command->projection, expected)) - return false; - - command->type = ASTAlterCommand::DROP_PROJECTION; - command->clear_projection = true; - command->detach = false; - - if (s_in_partition.ignore(pos, expected)) - { - if (!parser_partition.parse(pos, command->partition, expected)) - return false; - } - } - else if (s_materialize_projection.ignore(pos, expected)) - { - if (s_if_exists.ignore(pos, expected)) - command->if_exists = true; - - if (!parser_name.parse(pos, command->projection, expected)) - return false; - - command->type = ASTAlterCommand::MATERIALIZE_PROJECTION; - command->detach = false; - - if (s_in_partition.ignore(pos, expected)) - { - if (!parser_partition.parse(pos, command->partition, expected)) - return false; - } - } - else if (s_move_part.ignore(pos, expected)) - { - if (!parser_string_literal.parse(pos, command->partition, expected)) - return false; - - command->type = ASTAlterCommand::MOVE_PARTITION; - command->part = true; - - if (s_to_disk.ignore(pos)) - command->move_destination_type = DataDestinationType::DISK; - else if (s_to_volume.ignore(pos)) - command->move_destination_type = DataDestinationType::VOLUME; - else if (s_to_table.ignore(pos)) - { - if (!parseDatabaseAndTableName(pos, expected, command->to_database, command->to_table)) - return false; - command->move_destination_type = DataDestinationType::TABLE; - } - else if (s_to_shard.ignore(pos)) - { - command->move_destination_type = DataDestinationType::SHARD; + command->type = ASTAlterCommand::LIVE_VIEW_REFRESH; } else return false; - - if (command->move_destination_type != DataDestinationType::TABLE) - { - ASTPtr ast_space_name; - if (!parser_string_literal.parse(pos, ast_space_name, expected)) - return false; - - command->move_destination_name = ast_space_name->as().value.get(); - } + break; } - else if (s_move_partition.ignore(pos, expected)) + case ASTAlterQuery::AlterObjectType::DATABASE: { - if (!parser_partition.parse(pos, command->partition, expected)) - return false; - - command->type = ASTAlterCommand::MOVE_PARTITION; - - if (s_to_disk.ignore(pos)) - command->move_destination_type = DataDestinationType::DISK; - else if (s_to_volume.ignore(pos)) - command->move_destination_type = DataDestinationType::VOLUME; - else if (s_to_table.ignore(pos)) + if (s_modify_setting.ignore(pos, expected)) { - if (!parseDatabaseAndTableName(pos, expected, command->to_database, command->to_table)) + if (!parser_settings.parse(pos, command->settings_changes, expected)) return false; - command->move_destination_type = DataDestinationType::TABLE; + command->type = ASTAlterCommand::MODIFY_SETTING; } else return false; - - if (command->move_destination_type != DataDestinationType::TABLE) + break; + } + case ASTAlterQuery::AlterObjectType::TABLE: + { + if (s_add_column.ignore(pos, expected)) { - ASTPtr ast_space_name; - if (!parser_string_literal.parse(pos, ast_space_name, expected)) + if (s_if_not_exists.ignore(pos, expected)) + command->if_not_exists = true; + + if (!parser_col_decl.parse(pos, command->col_decl, expected)) return false; - command->move_destination_name = ast_space_name->as().value.get(); - } - } - else if (s_add_constraint.ignore(pos, expected)) - { - if (s_if_not_exists.ignore(pos, expected)) - command->if_not_exists = true; - - if (!parser_constraint_decl.parse(pos, command->constraint_decl, expected)) - return false; - - command->type = ASTAlterCommand::ADD_CONSTRAINT; - } - else if (s_drop_constraint.ignore(pos, expected)) - { - if (s_if_exists.ignore(pos, expected)) - command->if_exists = true; - - if (!parser_name.parse(pos, command->constraint, expected)) - return false; - - command->type = ASTAlterCommand::DROP_CONSTRAINT; - command->detach = false; - } - else if (s_detach_partition.ignore(pos, expected)) - { - if (!parser_partition.parse(pos, command->partition, expected)) - return false; - - command->type = ASTAlterCommand::DROP_PARTITION; - command->detach = true; - } - else if (s_detach_part.ignore(pos, expected)) - { - if (!parser_string_literal.parse(pos, command->partition, expected)) - return false; - - command->type = ASTAlterCommand::DROP_PARTITION; - command->part = true; - command->detach = true; - } - else if (s_attach_partition.ignore(pos, expected)) - { - if (!parser_partition.parse(pos, command->partition, expected)) - return false; - - if (s_from.ignore(pos)) - { - if (!parseDatabaseAndTableName(pos, expected, command->from_database, command->from_table)) - return false; - - command->replace = false; - command->type = ASTAlterCommand::REPLACE_PARTITION; - } - else - { - command->type = ASTAlterCommand::ATTACH_PARTITION; - } - } - else if (s_replace_partition.ignore(pos, expected)) - { - if (!parser_partition.parse(pos, command->partition, expected)) - return false; - - if (!s_from.ignore(pos, expected)) - return false; - - if (!parseDatabaseAndTableName(pos, expected, command->from_database, command->from_table)) - return false; - - command->replace = true; - command->type = ASTAlterCommand::REPLACE_PARTITION; - } - else if (s_attach_part.ignore(pos, expected)) - { - if (!parser_string_literal.parse(pos, command->partition, expected)) - return false; - - command->part = true; - command->type = ASTAlterCommand::ATTACH_PARTITION; - } - else if (s_fetch_partition.ignore(pos, expected)) - { - if (!parser_partition.parse(pos, command->partition, expected)) - return false; - - if (!s_from.ignore(pos, expected)) - return false; - - ASTPtr ast_from; - if (!parser_string_literal.parse(pos, ast_from, expected)) - return false; - - command->from = ast_from->as().value.get(); - command->type = ASTAlterCommand::FETCH_PARTITION; - } - else if (s_fetch_part.ignore(pos, expected)) - { - if (!parser_string_literal.parse(pos, command->partition, expected)) - return false; - - if (!s_from.ignore(pos, expected)) - return false; - - ASTPtr ast_from; - if (!parser_string_literal.parse(pos, ast_from, expected)) - return false; - command->from = ast_from->as().value.get(); - command->part = true; - command->type = ASTAlterCommand::FETCH_PARTITION; - } - else if (s_freeze.ignore(pos, expected)) - { - if (s_partition.ignore(pos, expected)) - { - if (!parser_partition.parse(pos, command->partition, expected)) - return false; - - command->type = ASTAlterCommand::FREEZE_PARTITION; - } - else - { - command->type = ASTAlterCommand::FREEZE_ALL; - } - - /// WITH NAME 'name' - place local backup to directory with specified name - if (s_with.ignore(pos, expected)) - { - if (!s_name.ignore(pos, expected)) - return false; - - ASTPtr ast_with_name; - if (!parser_string_literal.parse(pos, ast_with_name, expected)) - return false; - - command->with_name = ast_with_name->as().value.get(); - } - } - else if (s_unfreeze.ignore(pos, expected)) - { - if (s_partition.ignore(pos, expected)) - { - if (!parser_partition.parse(pos, command->partition, expected)) - return false; - - command->type = ASTAlterCommand::UNFREEZE_PARTITION; - } - else - { - command->type = ASTAlterCommand::UNFREEZE_ALL; - } - - /// WITH NAME 'name' - remove local backup to directory with specified name - if (s_with.ignore(pos, expected)) - { - if (!s_name.ignore(pos, expected)) - return false; - - ASTPtr ast_with_name; - if (!parser_string_literal.parse(pos, ast_with_name, expected)) - return false; - - command->with_name = ast_with_name->as().value.get(); - } - else - { - return false; - } - } - else if (s_modify_column.ignore(pos, expected)) - { - if (s_if_exists.ignore(pos, expected)) - command->if_exists = true; - - if (!parser_modify_col_decl.parse(pos, command->col_decl, expected)) - return false; - - if (s_remove.ignore(pos, expected)) - { - if (s_default.ignore(pos, expected)) - command->remove_property = "DEFAULT"; - else if (s_materialized.ignore(pos, expected)) - command->remove_property = "MATERIALIZED"; - else if (s_alias.ignore(pos, expected)) - command->remove_property = "ALIAS"; - else if (s_comment.ignore(pos, expected)) - command->remove_property = "COMMENT"; - else if (s_codec.ignore(pos, expected)) - command->remove_property = "CODEC"; - else if (s_ttl.ignore(pos, expected)) - command->remove_property = "TTL"; - else - return false; - } - else - { if (s_first.ignore(pos, expected)) command->first = true; else if (s_after.ignore(pos, expected)) @@ -618,111 +164,581 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected if (!parser_name.parse(pos, command->column, expected)) return false; } + + command->type = ASTAlterCommand::ADD_COLUMN; } - command->type = ASTAlterCommand::MODIFY_COLUMN; - } - else if (s_modify_order_by.ignore(pos, expected)) - { - if (!parser_exp_elem.parse(pos, command->order_by, expected)) - return false; + else if (s_rename_column.ignore(pos, expected)) + { + if (s_if_exists.ignore(pos, expected)) + command->if_exists = true; - command->type = ASTAlterCommand::MODIFY_ORDER_BY; - } - else if (s_modify_sample_by.ignore(pos, expected)) - { - if (!parser_exp_elem.parse(pos, command->sample_by, expected)) - return false; + if (!parser_name.parse(pos, command->column, expected)) + return false; - command->type = ASTAlterCommand::MODIFY_SAMPLE_BY; - } - else if (s_delete.ignore(pos, expected)) - { - if (s_in_partition.ignore(pos, expected)) + if (!s_to.ignore(pos, expected)) + return false; + + if (!parser_name.parse(pos, command->rename_to, expected)) + return false; + + command->type = ASTAlterCommand::RENAME_COLUMN; + } + else if (s_drop_partition.ignore(pos, expected)) { if (!parser_partition.parse(pos, command->partition, expected)) return false; + + command->type = ASTAlterCommand::DROP_PARTITION; } + else if (s_drop_part.ignore(pos, expected)) + { + if (!parser_string_literal.parse(pos, command->partition, expected)) + return false; - if (!s_where.ignore(pos, expected)) - return false; - - if (!parser_exp_elem.parse(pos, command->predicate, expected)) - return false; - - command->type = ASTAlterCommand::DELETE; - } - else if (s_update.ignore(pos, expected)) - { - if (!parser_assignment_list.parse(pos, command->update_assignments, expected)) - return false; - - if (s_in_partition.ignore(pos, expected)) + command->type = ASTAlterCommand::DROP_PARTITION; + command->part = true; + } + else if (s_drop_detached_partition.ignore(pos, expected)) { if (!parser_partition.parse(pos, command->partition, expected)) return false; + + command->type = ASTAlterCommand::DROP_DETACHED_PARTITION; } + else if (s_drop_detached_part.ignore(pos, expected)) + { + if (!parser_string_literal.parse(pos, command->partition, expected)) + return false; - if (!s_where.ignore(pos, expected)) - return false; + command->type = ASTAlterCommand::DROP_DETACHED_PARTITION; + command->part = true; + } + else if (s_drop_column.ignore(pos, expected)) + { + if (s_if_exists.ignore(pos, expected)) + command->if_exists = true; - if (!parser_exp_elem.parse(pos, command->predicate, expected)) - return false; + if (!parser_name.parse(pos, command->column, expected)) + return false; - command->type = ASTAlterCommand::UPDATE; - } - else if (s_comment_column.ignore(pos, expected)) - { - if (s_if_exists.ignore(pos, expected)) - command->if_exists = true; + command->type = ASTAlterCommand::DROP_COLUMN; + command->detach = false; + } + else if (s_clear_column.ignore(pos, expected)) + { + if (s_if_exists.ignore(pos, expected)) + command->if_exists = true; - if (!parser_name.parse(pos, command->column, expected)) - return false; + if (!parser_name.parse(pos, command->column, expected)) + return false; - if (!parser_string_literal.parse(pos, command->comment, expected)) - return false; + command->type = ASTAlterCommand::DROP_COLUMN; + command->clear_column = true; + command->detach = false; - command->type = ASTAlterCommand::COMMENT_COLUMN; - } - else if (s_modify_ttl.ignore(pos, expected)) - { - if (!parser_ttl_list.parse(pos, command->ttl, expected)) - return false; - command->type = ASTAlterCommand::MODIFY_TTL; - } - else if (s_remove_ttl.ignore(pos, expected)) - { - command->type = ASTAlterCommand::REMOVE_TTL; - } - else if (s_materialize_ttl.ignore(pos, expected)) - { - command->type = ASTAlterCommand::MATERIALIZE_TTL; + if (s_in_partition.ignore(pos, expected)) + { + if (!parser_partition.parse(pos, command->partition, expected)) + return false; + } + } + else if (s_add_index.ignore(pos, expected)) + { + if (s_if_not_exists.ignore(pos, expected)) + command->if_not_exists = true; - if (s_in_partition.ignore(pos, expected)) + if (!parser_idx_decl.parse(pos, command->index_decl, expected)) + return false; + + if (s_first.ignore(pos, expected)) + command->first = true; + else if (s_after.ignore(pos, expected)) + { + if (!parser_name.parse(pos, command->index, expected)) + return false; + } + + command->type = ASTAlterCommand::ADD_INDEX; + } + else if (s_drop_index.ignore(pos, expected)) + { + if (s_if_exists.ignore(pos, expected)) + command->if_exists = true; + + if (!parser_name.parse(pos, command->index, expected)) + return false; + + command->type = ASTAlterCommand::DROP_INDEX; + command->detach = false; + } + else if (s_clear_index.ignore(pos, expected)) + { + if (s_if_exists.ignore(pos, expected)) + command->if_exists = true; + + if (!parser_name.parse(pos, command->index, expected)) + return false; + + command->type = ASTAlterCommand::DROP_INDEX; + command->clear_index = true; + command->detach = false; + + if (s_in_partition.ignore(pos, expected)) + { + if (!parser_partition.parse(pos, command->partition, expected)) + return false; + } + } + else if (s_materialize_index.ignore(pos, expected)) + { + if (s_if_exists.ignore(pos, expected)) + command->if_exists = true; + + if (!parser_name.parse(pos, command->index, expected)) + return false; + + command->type = ASTAlterCommand::MATERIALIZE_INDEX; + command->detach = false; + + if (s_in_partition.ignore(pos, expected)) + { + if (!parser_partition.parse(pos, command->partition, expected)) + return false; + } + } + else if (s_add_projection.ignore(pos, expected)) + { + if (s_if_not_exists.ignore(pos, expected)) + command->if_not_exists = true; + + if (!parser_projection_decl.parse(pos, command->projection_decl, expected)) + return false; + + if (s_first.ignore(pos, expected)) + command->first = true; + else if (s_after.ignore(pos, expected)) + { + if (!parser_name.parse(pos, command->projection, expected)) + return false; + } + + command->type = ASTAlterCommand::ADD_PROJECTION; + } + else if (s_drop_projection.ignore(pos, expected)) + { + if (s_if_exists.ignore(pos, expected)) + command->if_exists = true; + + if (!parser_name.parse(pos, command->projection, expected)) + return false; + + command->type = ASTAlterCommand::DROP_PROJECTION; + command->detach = false; + } + else if (s_clear_projection.ignore(pos, expected)) + { + if (s_if_exists.ignore(pos, expected)) + command->if_exists = true; + + if (!parser_name.parse(pos, command->projection, expected)) + return false; + + command->type = ASTAlterCommand::DROP_PROJECTION; + command->clear_projection = true; + command->detach = false; + + if (s_in_partition.ignore(pos, expected)) + { + if (!parser_partition.parse(pos, command->partition, expected)) + return false; + } + } + else if (s_materialize_projection.ignore(pos, expected)) + { + if (s_if_exists.ignore(pos, expected)) + command->if_exists = true; + + if (!parser_name.parse(pos, command->projection, expected)) + return false; + + command->type = ASTAlterCommand::MATERIALIZE_PROJECTION; + command->detach = false; + + if (s_in_partition.ignore(pos, expected)) + { + if (!parser_partition.parse(pos, command->partition, expected)) + return false; + } + } + else if (s_move_part.ignore(pos, expected)) + { + if (!parser_string_literal.parse(pos, command->partition, expected)) + return false; + + command->type = ASTAlterCommand::MOVE_PARTITION; + command->part = true; + + if (s_to_disk.ignore(pos)) + command->move_destination_type = DataDestinationType::DISK; + else if (s_to_volume.ignore(pos)) + command->move_destination_type = DataDestinationType::VOLUME; + else if (s_to_table.ignore(pos)) + { + if (!parseDatabaseAndTableName(pos, expected, command->to_database, command->to_table)) + return false; + command->move_destination_type = DataDestinationType::TABLE; + } + else if (s_to_shard.ignore(pos)) + { + command->move_destination_type = DataDestinationType::SHARD; + } + else + return false; + + if (command->move_destination_type != DataDestinationType::TABLE) + { + ASTPtr ast_space_name; + if (!parser_string_literal.parse(pos, ast_space_name, expected)) + return false; + + command->move_destination_name = ast_space_name->as().value.get(); + } + } + else if (s_move_partition.ignore(pos, expected)) { if (!parser_partition.parse(pos, command->partition, expected)) return false; + + command->type = ASTAlterCommand::MOVE_PARTITION; + + if (s_to_disk.ignore(pos)) + command->move_destination_type = DataDestinationType::DISK; + else if (s_to_volume.ignore(pos)) + command->move_destination_type = DataDestinationType::VOLUME; + else if (s_to_table.ignore(pos)) + { + if (!parseDatabaseAndTableName(pos, expected, command->to_database, command->to_table)) + return false; + command->move_destination_type = DataDestinationType::TABLE; + } + else + return false; + + if (command->move_destination_type != DataDestinationType::TABLE) + { + ASTPtr ast_space_name; + if (!parser_string_literal.parse(pos, ast_space_name, expected)) + return false; + + command->move_destination_name = ast_space_name->as().value.get(); + } } - } - else if (s_modify_setting.ignore(pos, expected)) - { - if (!parser_settings.parse(pos, command->settings_changes, expected)) + else if (s_add_constraint.ignore(pos, expected)) + { + if (s_if_not_exists.ignore(pos, expected)) + command->if_not_exists = true; + + if (!parser_constraint_decl.parse(pos, command->constraint_decl, expected)) + return false; + + command->type = ASTAlterCommand::ADD_CONSTRAINT; + } + else if (s_drop_constraint.ignore(pos, expected)) + { + if (s_if_exists.ignore(pos, expected)) + command->if_exists = true; + + if (!parser_name.parse(pos, command->constraint, expected)) + return false; + + command->type = ASTAlterCommand::DROP_CONSTRAINT; + command->detach = false; + } + else if (s_detach_partition.ignore(pos, expected)) + { + if (!parser_partition.parse(pos, command->partition, expected)) + return false; + + command->type = ASTAlterCommand::DROP_PARTITION; + command->detach = true; + } + else if (s_detach_part.ignore(pos, expected)) + { + if (!parser_string_literal.parse(pos, command->partition, expected)) + return false; + + command->type = ASTAlterCommand::DROP_PARTITION; + command->part = true; + command->detach = true; + } + else if (s_attach_partition.ignore(pos, expected)) + { + if (!parser_partition.parse(pos, command->partition, expected)) + return false; + + if (s_from.ignore(pos)) + { + if (!parseDatabaseAndTableName(pos, expected, command->from_database, command->from_table)) + return false; + + command->replace = false; + command->type = ASTAlterCommand::REPLACE_PARTITION; + } + else + { + command->type = ASTAlterCommand::ATTACH_PARTITION; + } + } + else if (s_replace_partition.ignore(pos, expected)) + { + if (!parser_partition.parse(pos, command->partition, expected)) + return false; + + if (!s_from.ignore(pos, expected)) + return false; + + if (!parseDatabaseAndTableName(pos, expected, command->from_database, command->from_table)) + return false; + + command->replace = true; + command->type = ASTAlterCommand::REPLACE_PARTITION; + } + else if (s_attach_part.ignore(pos, expected)) + { + if (!parser_string_literal.parse(pos, command->partition, expected)) + return false; + + command->part = true; + command->type = ASTAlterCommand::ATTACH_PARTITION; + } + else if (s_fetch_partition.ignore(pos, expected)) + { + if (!parser_partition.parse(pos, command->partition, expected)) + return false; + + if (!s_from.ignore(pos, expected)) + return false; + + ASTPtr ast_from; + if (!parser_string_literal.parse(pos, ast_from, expected)) + return false; + + command->from = ast_from->as().value.get(); + command->type = ASTAlterCommand::FETCH_PARTITION; + } + else if (s_fetch_part.ignore(pos, expected)) + { + if (!parser_string_literal.parse(pos, command->partition, expected)) + return false; + + if (!s_from.ignore(pos, expected)) + return false; + + ASTPtr ast_from; + if (!parser_string_literal.parse(pos, ast_from, expected)) + return false; + command->from = ast_from->as().value.get(); + command->part = true; + command->type = ASTAlterCommand::FETCH_PARTITION; + } + else if (s_freeze.ignore(pos, expected)) + { + if (s_partition.ignore(pos, expected)) + { + if (!parser_partition.parse(pos, command->partition, expected)) + return false; + + command->type = ASTAlterCommand::FREEZE_PARTITION; + } + else + { + command->type = ASTAlterCommand::FREEZE_ALL; + } + + /// WITH NAME 'name' - place local backup to directory with specified name + if (s_with.ignore(pos, expected)) + { + if (!s_name.ignore(pos, expected)) + return false; + + ASTPtr ast_with_name; + if (!parser_string_literal.parse(pos, ast_with_name, expected)) + return false; + + command->with_name = ast_with_name->as().value.get(); + } + } + else if (s_unfreeze.ignore(pos, expected)) + { + if (s_partition.ignore(pos, expected)) + { + if (!parser_partition.parse(pos, command->partition, expected)) + return false; + + command->type = ASTAlterCommand::UNFREEZE_PARTITION; + } + else + { + command->type = ASTAlterCommand::UNFREEZE_ALL; + } + + /// WITH NAME 'name' - remove local backup to directory with specified name + if (s_with.ignore(pos, expected)) + { + if (!s_name.ignore(pos, expected)) + return false; + + ASTPtr ast_with_name; + if (!parser_string_literal.parse(pos, ast_with_name, expected)) + return false; + + command->with_name = ast_with_name->as().value.get(); + } + else + { + return false; + } + } + else if (s_modify_column.ignore(pos, expected)) + { + if (s_if_exists.ignore(pos, expected)) + command->if_exists = true; + + if (!parser_modify_col_decl.parse(pos, command->col_decl, expected)) + return false; + + if (s_remove.ignore(pos, expected)) + { + if (s_default.ignore(pos, expected)) + command->remove_property = "DEFAULT"; + else if (s_materialized.ignore(pos, expected)) + command->remove_property = "MATERIALIZED"; + else if (s_alias.ignore(pos, expected)) + command->remove_property = "ALIAS"; + else if (s_comment.ignore(pos, expected)) + command->remove_property = "COMMENT"; + else if (s_codec.ignore(pos, expected)) + command->remove_property = "CODEC"; + else if (s_ttl.ignore(pos, expected)) + command->remove_property = "TTL"; + else + return false; + } + else + { + if (s_first.ignore(pos, expected)) + command->first = true; + else if (s_after.ignore(pos, expected)) + { + if (!parser_name.parse(pos, command->column, expected)) + return false; + } + } + command->type = ASTAlterCommand::MODIFY_COLUMN; + } + else if (s_modify_order_by.ignore(pos, expected)) + { + if (!parser_exp_elem.parse(pos, command->order_by, expected)) + return false; + + command->type = ASTAlterCommand::MODIFY_ORDER_BY; + } + else if (s_modify_sample_by.ignore(pos, expected)) + { + if (!parser_exp_elem.parse(pos, command->sample_by, expected)) + return false; + + command->type = ASTAlterCommand::MODIFY_SAMPLE_BY; + } + else if (s_delete.ignore(pos, expected)) + { + if (s_in_partition.ignore(pos, expected)) + { + if (!parser_partition.parse(pos, command->partition, expected)) + return false; + } + + if (!s_where.ignore(pos, expected)) + return false; + + if (!parser_exp_elem.parse(pos, command->predicate, expected)) + return false; + + command->type = ASTAlterCommand::DELETE; + } + else if (s_update.ignore(pos, expected)) + { + if (!parser_assignment_list.parse(pos, command->update_assignments, expected)) + return false; + + if (s_in_partition.ignore(pos, expected)) + { + if (!parser_partition.parse(pos, command->partition, expected)) + return false; + } + + if (!s_where.ignore(pos, expected)) + return false; + + if (!parser_exp_elem.parse(pos, command->predicate, expected)) + return false; + + command->type = ASTAlterCommand::UPDATE; + } + else if (s_comment_column.ignore(pos, expected)) + { + if (s_if_exists.ignore(pos, expected)) + command->if_exists = true; + + if (!parser_name.parse(pos, command->column, expected)) + return false; + + if (!parser_string_literal.parse(pos, command->comment, expected)) + return false; + + command->type = ASTAlterCommand::COMMENT_COLUMN; + } + else if (s_modify_ttl.ignore(pos, expected)) + { + if (!parser_ttl_list.parse(pos, command->ttl, expected)) + return false; + command->type = ASTAlterCommand::MODIFY_TTL; + } + else if (s_remove_ttl.ignore(pos, expected)) + { + command->type = ASTAlterCommand::REMOVE_TTL; + } + else if (s_materialize_ttl.ignore(pos, expected)) + { + command->type = ASTAlterCommand::MATERIALIZE_TTL; + + if (s_in_partition.ignore(pos, expected)) + { + if (!parser_partition.parse(pos, command->partition, expected)) + return false; + } + } + else if (s_modify_setting.ignore(pos, expected)) + { + if (!parser_settings.parse(pos, command->settings_changes, expected)) + return false; + command->type = ASTAlterCommand::MODIFY_SETTING; + } + else if (s_reset_setting.ignore(pos, expected)) + { + if (!parser_reset_setting.parse(pos, command->settings_resets, expected)) + return false; + command->type = ASTAlterCommand::RESET_SETTING; + } + else if (s_modify_query.ignore(pos, expected)) + { + if (!select_p.parse(pos, command->select, expected)) + return false; + command->type = ASTAlterCommand::MODIFY_QUERY; + } + else return false; - command->type = ASTAlterCommand::MODIFY_SETTING; } - else if (s_reset_setting.ignore(pos, expected)) - { - if (!parser_reset_setting.parse(pos, command->settings_resets, expected)) - return false; - command->type = ASTAlterCommand::RESET_SETTING; - } - else if (s_modify_query.ignore(pos, expected)) - { - if (!select_p.parse(pos, command->select, expected)) - return false; - command->type = ASTAlterCommand::MODIFY_QUERY; - } - else - return false; } if (command->col_decl) @@ -770,7 +786,7 @@ bool ParserAlterCommandList::parseImpl(Pos & pos, ASTPtr & node, Expected & expe node = command_list; ParserToken s_comma(TokenType::Comma); - ParserAlterCommand p_command(is_live_view); + ParserAlterCommand p_command(alter_object); do { @@ -793,19 +809,24 @@ bool ParserAlterQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) ParserKeyword s_alter_table("ALTER TABLE"); ParserKeyword s_alter_live_view("ALTER LIVE VIEW"); + ParserKeyword s_alter_database("ALTER DATABASE"); - bool is_live_view = false; + ASTAlterQuery::AlterObjectType alter_object_type; - if (!s_alter_table.ignore(pos, expected)) + if (s_alter_table.ignore(pos, expected)) { - if (!s_alter_live_view.ignore(pos, expected)) - return false; - else - is_live_view = true; + alter_object_type = ASTAlterQuery::AlterObjectType::TABLE; } - - if (is_live_view) - query->is_live_view = true; + else if (s_alter_live_view.ignore(pos, expected)) + { + alter_object_type = ASTAlterQuery::AlterObjectType::LIVE_VIEW; + } + else if (s_alter_database.ignore(pos, expected)) + { + alter_object_type = ASTAlterQuery::AlterObjectType::DATABASE; + } + else + return false; if (!parseDatabaseAndTableName(pos, expected, query->database, query->table)) return false; @@ -818,7 +839,7 @@ bool ParserAlterQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) } query->cluster = cluster_str; - ParserAlterCommandList p_command_list(is_live_view); + ParserAlterCommandList p_command_list(alter_object_type); ASTPtr command_list; if (!p_command_list.parse(pos, command_list, expected)) return false; diff --git a/src/Parsers/ParserAlterQuery.h b/src/Parsers/ParserAlterQuery.h index 2e54c4ddbaf..de9d752d1a3 100644 --- a/src/Parsers/ParserAlterQuery.h +++ b/src/Parsers/ParserAlterQuery.h @@ -2,6 +2,7 @@ #include #include +#include namespace DB { @@ -45,9 +46,10 @@ protected: bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; public: - bool is_live_view; + ASTAlterQuery::AlterObjectType alter_object; - ParserAlterCommandList(bool is_live_view_ = false) : is_live_view(is_live_view_) {} + ParserAlterCommandList(ASTAlterQuery::AlterObjectType alter_object_ = ASTAlterQuery::AlterObjectType::TABLE) + : alter_object(alter_object_) {} }; @@ -58,9 +60,10 @@ protected: bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; public: - bool is_live_view; + ASTAlterQuery::AlterObjectType alter_object; - ParserAlterCommand(bool is_live_view_ = false) : is_live_view(is_live_view_) {} + ParserAlterCommand(ASTAlterQuery::AlterObjectType alter_object_ = ASTAlterQuery::AlterObjectType::TABLE) + : alter_object(alter_object_) {} }; From 4cd62227cfa25044e61e22b1e631f46621492157 Mon Sep 17 00:00:00 2001 From: kssenii Date: Fri, 27 Aug 2021 09:30:21 +0300 Subject: [PATCH 06/80] Almost done --- src/Access/AccessType.h | 2 + src/Common/ErrorCodes.cpp | 9 +- src/Core/PostgreSQL/Connection.cpp | 2 +- src/Core/PostgreSQL/Utils.cpp | 9 + src/Core/PostgreSQL/Utils.h | 4 + src/Databases/DatabaseAtomic.cpp | 66 ++++++- src/Databases/DatabaseAtomic.h | 7 +- src/Databases/DatabaseFactory.cpp | 23 ++- src/Databases/DatabaseReplicated.cpp | 5 +- src/Databases/DatabaseReplicated.h | 2 +- src/Databases/IDatabase.h | 17 ++ .../MySQL/DatabaseMaterializedMySQL.cpp | 8 +- .../MySQL/DatabaseMaterializedMySQL.h | 2 +- .../DatabaseMaterializedPostgreSQL.cpp | 133 ++++++++++++- .../DatabaseMaterializedPostgreSQL.h | 12 +- .../fetchPostgreSQLTableStructure.cpp | 8 + src/Interpreters/DatabaseCatalog.cpp | 2 +- src/Interpreters/InterpreterAlterQuery.cpp | 50 ++++- src/Interpreters/InterpreterAlterQuery.h | 5 + src/Parsers/ASTAlterQuery.cpp | 7 + src/Parsers/ASTAlterQuery.h | 7 +- src/Parsers/ParserAlterQuery.cpp | 31 +++- src/Parsers/parseDatabaseAndTableName.cpp | 16 ++ src/Parsers/parseDatabaseAndTableName.h | 2 + src/Storages/AlterCommands.cpp | 26 +++ src/Storages/AlterCommands.h | 5 + .../MaterializedPostgreSQLConsumer.cpp | 96 +++++++--- .../MaterializedPostgreSQLConsumer.h | 19 +- .../PostgreSQLReplicationHandler.cpp | 174 ++++++++++++++---- .../PostgreSQL/PostgreSQLReplicationHandler.h | 24 +-- .../StorageMaterializedPostgreSQL.cpp | 22 ++- .../StorageMaterializedPostgreSQL.h | 15 +- src/Storages/StorageInMemoryMetadata.h | 2 +- 33 files changed, 673 insertions(+), 139 deletions(-) diff --git a/src/Access/AccessType.h b/src/Access/AccessType.h index b1b49a6ba75..57342ee5503 100644 --- a/src/Access/AccessType.h +++ b/src/Access/AccessType.h @@ -71,6 +71,8 @@ enum class AccessType M(ALTER_FETCH_PARTITION, "ALTER FETCH PART, FETCH PARTITION", TABLE, ALTER_TABLE) \ M(ALTER_FREEZE_PARTITION, "FREEZE PARTITION, UNFREEZE", TABLE, ALTER_TABLE) \ \ + M(ALTER_DATABASE_SETTINGS, "ALTER DATABASE SETTING, ALTER MODIFY DATABASE SETTING, MODIFY DATABASE SETTING", TABLE, ALTER_TABLE) /* allows to execute ALTER MODIFY SETTING */\ + \ M(ALTER_TABLE, "", GROUP, ALTER) \ \ M(ALTER_VIEW_REFRESH, "ALTER LIVE VIEW REFRESH, REFRESH VIEW", VIEW, ALTER_VIEW) \ diff --git a/src/Common/ErrorCodes.cpp b/src/Common/ErrorCodes.cpp index 2110a8c7d6d..36d0fafdb4c 100644 --- a/src/Common/ErrorCodes.cpp +++ b/src/Common/ErrorCodes.cpp @@ -577,12 +577,9 @@ M(606, BACKUP_IS_EMPTY) \ M(607, BACKUP_ELEMENT_DUPLICATE) \ M(608, CANNOT_RESTORE_TABLE) \ - M(609, FUNCTION_ALREADY_EXISTS) \ - M(610, CANNOT_DROP_SYSTEM_FUNCTION) \ - M(611, CANNOT_CREATE_RECURSIVE_FUNCTION) \ - M(612, OBJECT_ALREADY_STORED_ON_DISK) \ - M(613, OBJECT_WAS_NOT_STORED_ON_DISK) \ - M(614, POSTGRESQL_CONNECTION_FAILURE) \ + M(609, POSTGRESQL_CONNECTION_FAILURE) \ + M(610, POSTGRESQL_REPLICATION_INTERNAL_ERROR) \ + M(611, QUERY_NOT_ALLOWED) \ \ M(999, KEEPER_EXCEPTION) \ M(1000, POCO_EXCEPTION) \ diff --git a/src/Core/PostgreSQL/Connection.cpp b/src/Core/PostgreSQL/Connection.cpp index e5c61c19963..2cb52fcff81 100644 --- a/src/Core/PostgreSQL/Connection.cpp +++ b/src/Core/PostgreSQL/Connection.cpp @@ -3,6 +3,7 @@ #if USE_LIBPQXX #include +#include namespace postgres { @@ -42,7 +43,6 @@ void Connection::execWithRetry(const std::function pqxx::connection & Connection::getRef() { connect(); - assert(connection != nullptr); return *connection; } diff --git a/src/Core/PostgreSQL/Utils.cpp b/src/Core/PostgreSQL/Utils.cpp index ebfdacd0fea..98cd706be69 100644 --- a/src/Core/PostgreSQL/Utils.cpp +++ b/src/Core/PostgreSQL/Utils.cpp @@ -19,6 +19,15 @@ ConnectionInfo formatConnectionString(String dbname, String host, UInt16 port, S return std::make_pair(out.str(), host + ':' + DB::toString(port)); } +String formatNameForLogs(const String & postgres_database_name, const String & postgres_table_name) +{ + if (postgres_database_name.empty()) + return postgres_table_name; + if (postgres_table_name.empty()) + return postgres_database_name; + return fmt::format("{}.{}", postgres_database_name, postgres_table_name); +} + } #endif diff --git a/src/Core/PostgreSQL/Utils.h b/src/Core/PostgreSQL/Utils.h index 4a58fcffb9a..59b44f8f5e1 100644 --- a/src/Core/PostgreSQL/Utils.h +++ b/src/Core/PostgreSQL/Utils.h @@ -19,7 +19,11 @@ namespace pqxx namespace postgres { + ConnectionInfo formatConnectionString(String dbname, String host, UInt16 port, String user, String password); + +String formatNameForLogs(const String & postgres_database_name, const String & postgres_table_name); + } #endif diff --git a/src/Databases/DatabaseAtomic.cpp b/src/Databases/DatabaseAtomic.cpp index b55277594be..8ce17198d3d 100644 --- a/src/Databases/DatabaseAtomic.cpp +++ b/src/Databases/DatabaseAtomic.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include #include #include @@ -36,19 +37,20 @@ public: UUID uuid() const override { return table()->getStorageID().uuid; } }; -DatabaseAtomic::DatabaseAtomic(String name_, String metadata_path_, UUID uuid, const String & logger_name, ContextPtr context_) +DatabaseAtomic::DatabaseAtomic(String name_, String metadata_path_, UUID uuid, const String & logger_name, ContextPtr context_, ASTPtr storage_def_) : DatabaseOrdinary(name_, std::move(metadata_path_), "store/", logger_name, context_) , path_to_table_symlinks(fs::path(getContext()->getPath()) / "data" / escapeForFileName(name_) / "") , path_to_metadata_symlink(fs::path(getContext()->getPath()) / "metadata" / escapeForFileName(name_)) , db_uuid(uuid) + , storage_def(storage_def_) { assert(db_uuid != UUIDHelpers::Nil); fs::create_directories(path_to_table_symlinks); tryCreateMetadataSymlink(); } -DatabaseAtomic::DatabaseAtomic(String name_, String metadata_path_, UUID uuid, ContextPtr context_) - : DatabaseAtomic(name_, std::move(metadata_path_), uuid, "DatabaseAtomic (" + name_ + ")", context_) +DatabaseAtomic::DatabaseAtomic(String name_, String metadata_path_, UUID uuid, ContextPtr context_, ASTPtr storage_def_) + : DatabaseAtomic(name_, std::move(metadata_path_), uuid, "DatabaseAtomic (" + name_ + ")", context_, storage_def_) { } @@ -566,4 +568,62 @@ void DatabaseAtomic::checkDetachedTableNotInUse(const UUID & uuid) assertDetachedTableNotInUse(uuid); } +void DatabaseAtomic::modifySettings(const SettingsChanges & settings_changes, ContextPtr local_context) +{ + applySettings(settings_changes, local_context); + + ASTCreateQuery create; + create.attach = true; + create.database = "_"; + create.uuid = getUUID(); + create.if_not_exists = false; + create.storage = assert_cast(storage_def.get()); + auto * ast_set_query = create.storage->settings; + + if (ast_set_query) + { + auto & previous_settings = ast_set_query->changes; + for (const auto & change : settings_changes) + { + auto it = std::find_if(previous_settings.begin(), previous_settings.end(), + [&](const auto & prev){ return prev.name == change.name; }); + if (it != previous_settings.end()) + it->value = change.value; + else + previous_settings.push_back(change); + } + } + else + { + auto settings = std::make_shared(); + settings->is_standalone = false; + settings->changes = settings_changes; + create.storage->set(create.storage->settings, settings->clone()); + } + + create.attach = true; + create.if_not_exists = false; + + WriteBufferFromOwnString statement_buf; + formatAST(create, statement_buf, false); + writeChar('\n', statement_buf); + String statement = statement_buf.str(); + + String database_name_escaped = escapeForFileName(database_name); + fs::path metadata_root_path = fs::canonical(local_context->getGlobalContext()->getPath()); + fs::path metadata_file_tmp_path = fs::path(metadata_root_path) / "metadata" / (database_name_escaped + ".sql.tmp"); + fs::path metadata_file_path = fs::path(metadata_root_path) / "metadata" / (database_name_escaped + ".sql"); + + /// Exclusive flag guarantees, that database is not created right now in another thread. + WriteBufferFromFile out(metadata_file_tmp_path, statement.size(), O_WRONLY | O_CREAT | O_EXCL); + writeString(statement, out); + + out.next(); + if (getContext()->getSettingsRef().fsync_metadata) + out.sync(); + out.close(); + + fs::rename(metadata_file_tmp_path, metadata_file_path); +} + } diff --git a/src/Databases/DatabaseAtomic.h b/src/Databases/DatabaseAtomic.h index 21e841841bd..0b00a4eb43a 100644 --- a/src/Databases/DatabaseAtomic.h +++ b/src/Databases/DatabaseAtomic.h @@ -19,8 +19,8 @@ namespace DB class DatabaseAtomic : public DatabaseOrdinary { public: - DatabaseAtomic(String name_, String metadata_path_, UUID uuid, const String & logger_name, ContextPtr context_); - DatabaseAtomic(String name_, String metadata_path_, UUID uuid, ContextPtr context_); + DatabaseAtomic(String name_, String metadata_path_, UUID uuid, const String & logger_name, ContextPtr context_, ASTPtr storage_def_); + DatabaseAtomic(String name_, String metadata_path_, UUID uuid, ContextPtr context_, ASTPtr storage_def_); String getEngineName() const override { return "Atomic"; } UUID getUUID() const override { return db_uuid; } @@ -61,6 +61,8 @@ public: void checkDetachedTableNotInUse(const UUID & uuid) override; void setDetachedTableNotInUseForce(const UUID & uuid); + void modifySettings(const SettingsChanges & settings_changes, ContextPtr local_context) override; + protected: void commitAlterTable(const StorageID & table_id, const String & table_metadata_tmp_path, const String & table_metadata_path, const String & statement, ContextPtr query_context) override; void commitCreateTable(const ASTCreateQuery & query, const StoragePtr & table, @@ -80,6 +82,7 @@ protected: String path_to_table_symlinks; String path_to_metadata_symlink; const UUID db_uuid; + ASTPtr storage_def; }; } diff --git a/src/Databases/DatabaseFactory.cpp b/src/Databases/DatabaseFactory.cpp index 75a3b9c9e1e..cff71a0e7fd 100644 --- a/src/Databases/DatabaseFactory.cpp +++ b/src/Databases/DatabaseFactory.cpp @@ -103,13 +103,20 @@ DatabasePtr DatabaseFactory::getImpl(const ASTCreateQuery & create, const String const String & engine_name = engine_define->engine->name; const UUID & uuid = create.uuid; + static const std::unordered_set database_engines{"Ordinary", "Atomic", "Memory", + "Dictionary", "Lazy", "Replicated", "MySQL", "MaterializeMySQL", "MaterializedMySQL", + "Lazy", "Replicated", "PostgreSQL", "MaterializedPostgreSQL", "SQLite"}; + + if (!database_engines.contains(engine_name)) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Database engine name `{}` does not exist", engine_name); + static const std::unordered_set engines_with_arguments{"MySQL", "MaterializeMySQL", "MaterializedMySQL", "Lazy", "Replicated", "PostgreSQL", "MaterializedPostgreSQL", "SQLite"}; bool engine_may_have_arguments = engines_with_arguments.contains(engine_name); if (engine_define->engine->arguments && !engine_may_have_arguments) - throw Exception("Database engine " + engine_name + " cannot have arguments", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Database engine `{}` cannot have arguments", engine_name); bool has_unexpected_element = engine_define->engine->parameters || engine_define->partition_by || engine_define->primary_key || engine_define->order_by || @@ -117,13 +124,13 @@ DatabasePtr DatabaseFactory::getImpl(const ASTCreateQuery & create, const String bool may_have_settings = endsWith(engine_name, "MySQL") || engine_name == "Replicated" || engine_name == "MaterializedPostgreSQL"; if (has_unexpected_element || (!may_have_settings && engine_define->settings)) - throw Exception("Database engine " + engine_name + " cannot have parameters, primary_key, order_by, sample_by, settings", - ErrorCodes::UNKNOWN_ELEMENT_IN_AST); + throw Exception(ErrorCodes::UNKNOWN_ELEMENT_IN_AST, + "Database engine `{}` cannot have parameters, primary_key, order_by, sample_by, settings", engine_name); if (engine_name == "Ordinary") return std::make_shared(database_name, metadata_path, context); else if (engine_name == "Atomic") - return std::make_shared(database_name, metadata_path, uuid, context); + return std::make_shared(database_name, metadata_path, uuid, context, engine_define->clone()); else if (engine_name == "Memory") return std::make_shared(database_name, context); else if (engine_name == "Dictionary") @@ -177,11 +184,11 @@ DatabasePtr DatabaseFactory::getImpl(const ASTCreateQuery & create, const String if (create.uuid == UUIDHelpers::Nil) return std::make_shared>( context, database_name, metadata_path, uuid, mysql_database_name, std::move(mysql_pool), std::move(client) - , std::move(materialize_mode_settings)); + , std::move(materialize_mode_settings), engine_define->clone()); else return std::make_shared>( context, database_name, metadata_path, uuid, mysql_database_name, std::move(mysql_pool), std::move(client) - , std::move(materialize_mode_settings)); + , std::move(materialize_mode_settings), engine_define->clone()); } catch (...) { @@ -227,7 +234,7 @@ DatabasePtr DatabaseFactory::getImpl(const ASTCreateQuery & create, const String return std::make_shared(database_name, metadata_path, uuid, zookeeper_path, shard_name, replica_name, - std::move(database_replicated_settings), context); + std::move(database_replicated_settings), context, engine_define->clone()); } #if USE_LIBPQXX @@ -304,7 +311,7 @@ DatabasePtr DatabaseFactory::getImpl(const ASTCreateQuery & create, const String postgresql_replica_settings->loadFromQuery(*engine_define); return std::make_shared( - context, metadata_path, uuid, engine_define, create.attach, + context, metadata_path, uuid, engine_define->clone(), create.attach, database_name, postgres_database_name, connection_info, std::move(postgresql_replica_settings)); } diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp index 8e8fb4e2d6d..d8835160151 100644 --- a/src/Databases/DatabaseReplicated.cpp +++ b/src/Databases/DatabaseReplicated.cpp @@ -67,8 +67,9 @@ DatabaseReplicated::DatabaseReplicated( const String & shard_name_, const String & replica_name_, DatabaseReplicatedSettings db_settings_, - ContextPtr context_) - : DatabaseAtomic(name_, metadata_path_, uuid, "DatabaseReplicated (" + name_ + ")", context_) + ContextPtr context_, + ASTPtr storage_def_) + : DatabaseAtomic(name_, metadata_path_, uuid, "DatabaseReplicated (" + name_ + ")", context_, storage_def_) , zookeeper_path(zookeeper_path_) , shard_name(shard_name_) , replica_name(replica_name_) diff --git a/src/Databases/DatabaseReplicated.h b/src/Databases/DatabaseReplicated.h index 41b1bf13e5f..997262325f6 100644 --- a/src/Databases/DatabaseReplicated.h +++ b/src/Databases/DatabaseReplicated.h @@ -24,7 +24,7 @@ public: DatabaseReplicated(const String & name_, const String & metadata_path_, UUID uuid, const String & zookeeper_path_, const String & shard_name_, const String & replica_name_, DatabaseReplicatedSettings db_settings_, - ContextPtr context); + ContextPtr context, ASTPtr storage_def_); ~DatabaseReplicated() override; diff --git a/src/Databases/IDatabase.h b/src/Databases/IDatabase.h index 387c6882eab..f3f801e620b 100644 --- a/src/Databases/IDatabase.h +++ b/src/Databases/IDatabase.h @@ -24,6 +24,8 @@ struct IndicesDescription; struct StorageInMemoryMetadata; struct StorageID; class ASTCreateQuery; +class AlterCommands; +class SettingsChanges; using DictionariesWithID = std::vector>; namespace ErrorCodes @@ -272,6 +274,21 @@ public: /// Delete data and metadata stored inside the database, if exists. virtual void drop(ContextPtr /*context*/) {} + virtual void checkAlterIsPossible(const AlterCommands & /* commands */, ContextPtr /* context */) const + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Alter is not supported by database engine {}", getEngineName()); + } + + virtual void modifySettings(const SettingsChanges &, ContextPtr) + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Database engine {} does not support settings", getEngineName()); + } + + virtual void applySettings(const SettingsChanges &, ContextPtr) + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Database engine {} does not support settings", getEngineName()); + } + virtual ~IDatabase() = default; protected: diff --git a/src/Databases/MySQL/DatabaseMaterializedMySQL.cpp b/src/Databases/MySQL/DatabaseMaterializedMySQL.cpp index ba9b30425dd..644e89894c4 100644 --- a/src/Databases/MySQL/DatabaseMaterializedMySQL.cpp +++ b/src/Databases/MySQL/DatabaseMaterializedMySQL.cpp @@ -37,7 +37,8 @@ DatabaseMaterializedMySQL::DatabaseMaterializedMySQL( const String & mysql_database_name_, mysqlxx::Pool && pool_, MySQLClient && client_, - std::unique_ptr settings_) + std::unique_ptr settings_, + ASTPtr) : DatabaseOrdinary( database_name_, metadata_path_, @@ -58,8 +59,9 @@ DatabaseMaterializedMySQL::DatabaseMaterializedMySQL( const String & mysql_database_name_, mysqlxx::Pool && pool_, MySQLClient && client_, - std::unique_ptr settings_) - : DatabaseAtomic(database_name_, metadata_path_, uuid, "DatabaseMaterializedMySQL (" + database_name_ + ")", context_) + std::unique_ptr settings_, + ASTPtr storage_def_) + : DatabaseAtomic(database_name_, metadata_path_, uuid, "DatabaseMaterializedMySQL (" + database_name_ + ")", context_, storage_def_) , settings(std::move(settings_)) , materialize_thread(context_, database_name_, mysql_database_name_, std::move(pool_), std::move(client_), settings.get()) { diff --git a/src/Databases/MySQL/DatabaseMaterializedMySQL.h b/src/Databases/MySQL/DatabaseMaterializedMySQL.h index 812a0fb64c8..7bfa310b4cf 100644 --- a/src/Databases/MySQL/DatabaseMaterializedMySQL.h +++ b/src/Databases/MySQL/DatabaseMaterializedMySQL.h @@ -25,7 +25,7 @@ public: DatabaseMaterializedMySQL( ContextPtr context, const String & database_name_, const String & metadata_path_, UUID uuid, const String & mysql_database_name_, mysqlxx::Pool && pool_, - MySQLClient && client_, std::unique_ptr settings_); + MySQLClient && client_, std::unique_ptr settings_, ASTPtr storage_def_); void rethrowExceptionIfNeed() const; diff --git a/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.cpp b/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.cpp index fdd181373df..b4b0037ff1b 100644 --- a/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.cpp +++ b/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.cpp @@ -10,12 +10,14 @@ #include #include #include +#include #include #include #include #include #include #include +#include #include #include #include @@ -30,20 +32,20 @@ namespace ErrorCodes { extern const int NOT_IMPLEMENTED; extern const int LOGICAL_ERROR; + extern const int QUERY_NOT_ALLOWED; } DatabaseMaterializedPostgreSQL::DatabaseMaterializedPostgreSQL( ContextPtr context_, const String & metadata_path_, UUID uuid_, - const ASTStorage * database_engine_define_, + ASTPtr storage_def_, bool is_attach_, const String & database_name_, const String & postgres_database_name, const postgres::ConnectionInfo & connection_info_, std::unique_ptr settings_) - : DatabaseAtomic(database_name_, metadata_path_, uuid_, "DatabaseMaterializedPostgreSQL (" + database_name_ + ")", context_) - , database_engine_define(database_engine_define_->clone()) + : DatabaseAtomic(database_name_, metadata_path_, uuid_, "DatabaseMaterializedPostgreSQL (" + database_name_ + ")", context_, storage_def_) , is_attach(is_attach_) , remote_database_name(postgres_database_name) , connection_info(connection_info_) @@ -66,11 +68,10 @@ void DatabaseMaterializedPostgreSQL::startSynchronization() /* is_materialized_postgresql_database = */ true, settings->materialized_postgresql_tables_list.value); - postgres::Connection connection(connection_info); NameSet tables_to_replicate; try { - tables_to_replicate = replication_handler->fetchRequiredTables(connection); + tables_to_replicate = replication_handler->fetchRequiredTables(); } catch (...) { @@ -89,12 +90,12 @@ void DatabaseMaterializedPostgreSQL::startSynchronization() if (storage) { /// Nested table was already created and synchronized. - storage = StorageMaterializedPostgreSQL::create(storage, getContext()); + storage = StorageMaterializedPostgreSQL::create(storage, getContext(), remote_database_name, table_name); } else { /// Nested table does not exist and will be created by replication thread. - storage = StorageMaterializedPostgreSQL::create(StorageID(database_name, table_name), getContext()); + storage = StorageMaterializedPostgreSQL::create(StorageID(database_name, table_name), getContext(), remote_database_name, table_name); } /// Cache MaterializedPostgreSQL wrapper over nested table. @@ -124,7 +125,34 @@ void DatabaseMaterializedPostgreSQL::loadStoredObjects(ContextMutablePtr local_c if (!force_attach) throw; } +} + +void DatabaseMaterializedPostgreSQL::checkAlterIsPossible(const AlterCommands & commands, ContextPtr) const +{ + for (const auto & command : commands) + { + if (command.type != AlterCommand::MODIFY_DATABASE_SETTING) + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Alter of type '{}' is not supported by database engine {}", alterTypeToString(command.type), getEngineName()); + } +} + + +void DatabaseMaterializedPostgreSQL::applySettings(const SettingsChanges & settings_changes, ContextPtr local_context) +{ + for (const auto & change : settings_changes) + { + if (!settings->has(change.name)) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Database engine {} does not support setting `{}`", getEngineName(), change.name); + + if (change.name == "materialized_postgresql_tables_list") + { + if (local_context->isInternalQuery() || materialized_tables.empty()) + throw Exception(ErrorCodes::QUERY_NOT_ALLOWED, "Changin settings `{}` is allowed only internally. Use CREATE TABLE query", change.name); + } + + settings->applyChange(change); + } } @@ -164,8 +192,38 @@ void DatabaseMaterializedPostgreSQL::createTable(ContextPtr local_context, const return; } - throw Exception(ErrorCodes::NOT_IMPLEMENTED, - "Create table query allowed only for ReplacingMergeTree engine and from synchronization thread"); + throw Exception(ErrorCodes::QUERY_NOT_ALLOWED, "CREATE TABLE is not allowed for database engine {}", getEngineName()); +} + + +void DatabaseMaterializedPostgreSQL::attachTable(const String & table_name, const StoragePtr & table, const String & relative_table_path) +{ + if (CurrentThread::isInitialized() && CurrentThread::get().getQueryContext()) + { + auto set = std::make_shared(); + set->is_standalone = false; + auto tables_to_replicate = settings->materialized_postgresql_tables_list.value; + set->changes = {SettingChange("materialized_postgresql_tables_list", tables_to_replicate.empty() ? table_name : (tables_to_replicate + "," + table_name))}; + + auto command = std::make_shared(); + command->type = ASTAlterCommand::Type::MODIFY_DATABASE_SETTING; + command->children.emplace_back(std::move(set)); + + auto expr = std::make_shared(); + expr->children.push_back(command); + + ASTAlterQuery alter; + alter.alter_object = ASTAlterQuery::AlterObjectType::DATABASE; + alter.children.emplace_back(std::move(expr)); + + auto storage = StorageMaterializedPostgreSQL::create(StorageID(database_name, table_name), getContext(), remote_database_name, table_name); + materialized_tables[table_name] = storage; + replication_handler->addTableToReplication(dynamic_cast(storage.get()), table_name); + } + else + { + DatabaseAtomic::attachTable(table_name, table, relative_table_path); + } } @@ -209,6 +267,63 @@ DatabaseTablesIteratorPtr DatabaseMaterializedPostgreSQL::getTablesIterator( return DatabaseAtomic::getTablesIterator(StorageMaterializedPostgreSQL::makeNestedTableContext(local_context), filter_by_table_name); } +static ASTPtr getColumnDeclaration(const DataTypePtr & data_type) +{ + WhichDataType which(data_type); + + if (which.isNullable()) + return makeASTFunction("Nullable", getColumnDeclaration(typeid_cast(data_type.get())->getNestedType())); + + if (which.isArray()) + return makeASTFunction("Array", getColumnDeclaration(typeid_cast(data_type.get())->getNestedType())); + + return std::make_shared(data_type->getName()); +} + + +ASTPtr DatabaseMaterializedPostgreSQL::getCreateTableQueryImpl(const String & table_name, ContextPtr local_context, bool throw_on_error) const +{ + if (!local_context->hasQueryContext()) + return DatabaseAtomic::getCreateTableQueryImpl(table_name, local_context, throw_on_error); + + /// Note: here we make an assumption that table structure will not change between call to this method and to attachTable(). + auto storage = StorageMaterializedPostgreSQL::create(StorageID(database_name, table_name), getContext(), remote_database_name, table_name); + replication_handler->addStructureToMaterializedStorage(storage.get(), table_name); + + auto create_table_query = std::make_shared(); + auto table_storage_define = storage_def->clone(); + create_table_query->set(create_table_query->storage, table_storage_define); + + auto columns_declare_list = std::make_shared(); + auto columns_expression_list = std::make_shared(); + + columns_declare_list->set(columns_declare_list->columns, columns_expression_list); + create_table_query->set(create_table_query->columns_list, columns_declare_list); + + /// init create query. + auto table_id = storage->getStorageID(); + create_table_query->table = table_id.table_name; + create_table_query->database = table_id.database_name; + + auto metadata_snapshot = storage->getInMemoryMetadataPtr(); + for (const auto & column_type_and_name : metadata_snapshot->getColumns().getAllPhysical()) + { + const auto & column_declaration = std::make_shared(); + column_declaration->name = column_type_and_name.name; + column_declaration->type = getColumnDeclaration(column_type_and_name.type); + columns_expression_list->children.emplace_back(column_declaration); + } + + ASTStorage * ast_storage = table_storage_define->as(); + ASTs storage_children = ast_storage->children; + auto storage_engine_arguments = ast_storage->engine->arguments; + /// Add table_name to engine arguments + storage_engine_arguments->children.insert(storage_engine_arguments->children.begin() + 2, std::make_shared(table_id.table_name)); + + return create_table_query; +} + + } diff --git a/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.h b/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.h index dd8b4dc438a..3c75af6cc9e 100644 --- a/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.h +++ b/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.h @@ -32,7 +32,7 @@ public: ContextPtr context_, const String & metadata_path_, UUID uuid_, - const ASTStorage * database_engine_define_, + ASTPtr storage_def_, bool is_attach_, const String & database_name_, const String & postgres_database_name, @@ -52,18 +52,26 @@ public: void createTable(ContextPtr context, const String & name, const StoragePtr & table, const ASTPtr & query) override; + void attachTable(const String & name, const StoragePtr & table, const String & relative_table_path) override; + void dropTable(ContextPtr local_context, const String & name, bool no_delay) override; void drop(ContextPtr local_context) override; void stopReplication(); + void checkAlterIsPossible(const AlterCommands & commands, ContextPtr context) const override; + + void applySettings(const SettingsChanges & settings_changes, ContextPtr context) override; + void shutdown() override; +protected: + ASTPtr getCreateTableQueryImpl(const String & table_name, ContextPtr local_context, bool throw_on_error) const override; + private: void startSynchronization(); - ASTPtr database_engine_define; bool is_attach; String remote_database_name; postgres::ConnectionInfo connection_info; diff --git a/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.cpp b/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.cpp index 1b77947264e..0495dd8723a 100644 --- a/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.cpp +++ b/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.cpp @@ -290,12 +290,20 @@ PostgreSQLTableStructure fetchPostgreSQLTableStructure( pqxx::ReplicationTransaction & tx, const String & postgres_table_name, bool use_nulls, bool with_primary_key, bool with_replica_identity_index); +template +PostgreSQLTableStructure fetchPostgreSQLTableStructure( + pqxx::nontransaction & tx, const String & postgres_table_name, bool use_nulls, + bool with_primary_key, bool with_replica_identity_index); + template std::unordered_set fetchPostgreSQLTablesList(pqxx::work & tx, const String & postgres_schema); template std::unordered_set fetchPostgreSQLTablesList(pqxx::ReadTransaction & tx, const String & postgres_schema); +template +std::unordered_set fetchPostgreSQLTablesList(pqxx::nontransaction & tx, const String & postgres_schema); + } #endif diff --git a/src/Interpreters/DatabaseCatalog.cpp b/src/Interpreters/DatabaseCatalog.cpp index fd6b5b9a810..ee49333a332 100644 --- a/src/Interpreters/DatabaseCatalog.cpp +++ b/src/Interpreters/DatabaseCatalog.cpp @@ -241,7 +241,7 @@ DatabaseAndTable DatabaseCatalog::getTableImpl( #if USE_LIBPQXX if (!context_->isInternalQuery() && (db_and_table.first->getEngineName() == "MaterializedPostgreSQL")) { - db_and_table.second = std::make_shared(std::move(db_and_table.second), getContext()); + db_and_table.second = std::make_shared(std::move(db_and_table.second), getContext(), "", db_and_table.second->getStorageID().table_name); } #endif diff --git a/src/Interpreters/InterpreterAlterQuery.cpp b/src/Interpreters/InterpreterAlterQuery.cpp index 76e7afb7009..2b4c5f3a8fa 100644 --- a/src/Interpreters/InterpreterAlterQuery.cpp +++ b/src/Interpreters/InterpreterAlterQuery.cpp @@ -39,12 +39,25 @@ InterpreterAlterQuery::InterpreterAlterQuery(const ASTPtr & query_ptr_, ContextP { } + BlockIO InterpreterAlterQuery::execute() { - BlockIO res; const auto & alter = query_ptr->as(); + std::cerr << "\n\n\n" << query_ptr->dumpTree() << std::endl; + if (alter.alter_object == ASTAlterQuery::AlterObjectType::DATABASE) + return executeToDatabase(alter); + else if (alter.alter_object == ASTAlterQuery::AlterObjectType::DATABASE) + return executeToTable(alter); + else if (alter.alter_object == ASTAlterQuery::AlterObjectType::LIVE_VIEW) + return executeToTable(alter); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown alter"); +} +BlockIO InterpreterAlterQuery::executeToTable(const ASTAlterQuery & alter) +{ + BlockIO res; + if (!alter.cluster.empty()) return executeDDLQueryOnCluster(query_ptr, getContext(), getRequiredAccess()); @@ -78,7 +91,9 @@ BlockIO InterpreterAlterQuery::execute() { auto * command_ast = child->as(); if (auto alter_command = AlterCommand::parse(command_ast)) + { alter_commands.emplace_back(std::move(*alter_command)); + } else if (auto partition_command = PartitionCommand::parse(command_ast)) { partition_commands.emplace_back(std::move(*partition_command)); @@ -92,7 +107,9 @@ BlockIO InterpreterAlterQuery::execute() mutation_commands.emplace_back(std::move(*mut_command)); } else if (auto live_view_command = LiveViewCommand::parse(command_ast)) + { live_view_commands.emplace_back(std::move(*live_view_command)); + } else throw Exception("Wrong parameter type in ALTER query", ErrorCodes::LOGICAL_ERROR); } @@ -149,6 +166,30 @@ BlockIO InterpreterAlterQuery::execute() } +BlockIO InterpreterAlterQuery::executeToDatabase(const ASTAlterQuery & alter) +{ + BlockIO res; + getContext()->checkAccess(getRequiredAccess()); + DatabasePtr database = DatabaseCatalog::instance().getDatabase(alter.database); + AlterCommands alter_commands; + + for (const auto & child : alter.command_list->children) + { + auto * command_ast = child->as(); + if (auto alter_command = AlterCommand::parse(command_ast)) + alter_commands.emplace_back(std::move(*alter_command)); + else + throw Exception("Wrong parameter type in ALTER DATABASE query", ErrorCodes::LOGICAL_ERROR); + } + + if (!alter_commands.empty()) + { + database->checkAlterIsPossible(alter_commands, getContext()); + alter_commands.apply(database, getContext()); + } + + return res; +} AccessRightsElements InterpreterAlterQuery::getRequiredAccess() const { AccessRightsElements required_access; @@ -343,6 +384,11 @@ AccessRightsElements InterpreterAlterQuery::getRequiredAccessForCommand(const AS required_access.emplace_back(AccessType::ALTER_RENAME_COLUMN, database, table, column_name()); break; } + case ASTAlterCommand::MODIFY_DATABASE_SETTING: + { + required_access.emplace_back(AccessType::ALTER_DATABASE_SETTINGS, database, table); + break; + } case ASTAlterCommand::NO_TYPE: break; } @@ -354,7 +400,7 @@ void InterpreterAlterQuery::extendQueryLogElemImpl(QueryLogElement & elem, const const auto & alter = ast->as(); elem.query_kind = "Alter"; - if (alter.command_list != nullptr) + if (alter.command_list != nullptr && alter.alter_object != ASTAlterQuery::AlterObjectType::DATABASE) { // Alter queries already have their target table inserted into `elem`. if (elem.query_tables.size() != 1) diff --git a/src/Interpreters/InterpreterAlterQuery.h b/src/Interpreters/InterpreterAlterQuery.h index ae9750b0b62..9494a400e7b 100644 --- a/src/Interpreters/InterpreterAlterQuery.h +++ b/src/Interpreters/InterpreterAlterQuery.h @@ -9,6 +9,7 @@ namespace DB class AccessRightsElements; class ASTAlterCommand; +class ASTAlterQuery; /** Allows you add or remove a column in the table. @@ -28,6 +29,10 @@ public: private: AccessRightsElements getRequiredAccess() const; + BlockIO executeToTable(const ASTAlterQuery & alter); + + BlockIO executeToDatabase(const ASTAlterQuery & alter); + ASTPtr query_ptr; }; diff --git a/src/Parsers/ASTAlterQuery.cpp b/src/Parsers/ASTAlterQuery.cpp index 0e617ca7c21..d0adb2e799f 100644 --- a/src/Parsers/ASTAlterQuery.cpp +++ b/src/Parsers/ASTAlterQuery.cpp @@ -389,6 +389,11 @@ void ASTAlterCommand::formatImpl( settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "RESET SETTING " << (settings.hilite ? hilite_none : ""); settings_resets->formatImpl(settings, state, frame); } + else if (type == ASTAlterCommand::MODIFY_DATABASE_SETTING) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "MODIFY SETTING " << (settings.hilite ? hilite_none : ""); + settings_changes->formatImpl(settings, state, frame); + } else if (type == ASTAlterCommand::MODIFY_QUERY) { settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "MODIFY QUERY " << settings.nl_or_ws << (settings.hilite ? hilite_none : ""); @@ -474,6 +479,8 @@ void ASTAlterQuery::formatQueryImpl(const FormatSettings & settings, FormatState case AlterObjectType::LIVE_VIEW: settings.ostr << "ALTER LIVE VIEW "; break; + default: + break; } settings.ostr << (settings.hilite ? hilite_none : ""); diff --git a/src/Parsers/ASTAlterQuery.h b/src/Parsers/ASTAlterQuery.h index dadba107ddc..d99bac80fed 100644 --- a/src/Parsers/ASTAlterQuery.h +++ b/src/Parsers/ASTAlterQuery.h @@ -68,6 +68,8 @@ public: NO_TYPE, LIVE_VIEW_REFRESH, + + MODIFY_DATABASE_SETTING, }; Type type = NO_TYPE; @@ -212,11 +214,12 @@ public: { TABLE, DATABASE, - LIVE_VIEW + LIVE_VIEW, + UNKNOWN, }; // bool is_live_view{false}; /// true for ALTER LIVE VIEW - AlterObjectType alter_object = AlterObjectType::TABLE; + AlterObjectType alter_object = AlterObjectType::UNKNOWN; ASTExpressionList * command_list = nullptr; diff --git a/src/Parsers/ParserAlterQuery.cpp b/src/Parsers/ParserAlterQuery.cpp index cb1796a70b5..e8e0db8c492 100644 --- a/src/Parsers/ParserAlterQuery.cpp +++ b/src/Parsers/ParserAlterQuery.cpp @@ -141,12 +141,14 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected { if (!parser_settings.parse(pos, command->settings_changes, expected)) return false; - command->type = ASTAlterCommand::MODIFY_SETTING; + command->type = ASTAlterCommand::MODIFY_DATABASE_SETTING; } else return false; break; } + default: + break; case ASTAlterQuery::AlterObjectType::TABLE: { if (s_add_column.ignore(pos, expected)) @@ -828,16 +830,27 @@ bool ParserAlterQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) else return false; - if (!parseDatabaseAndTableName(pos, expected, query->database, query->table)) - return false; - - String cluster_str; - if (ParserKeyword{"ON"}.ignore(pos, expected)) + if (alter_object_type == ASTAlterQuery::AlterObjectType::DATABASE) { - if (!ASTQueryWithOnCluster::parse(pos, cluster_str, expected)) + std::cerr << "\n\n\nOK!\n\n"; + if (!parseDatabase(pos, expected, query->database)) return false; + std::cerr << "database name: " << query->database << std::endl; + } + else + { + std::cerr << "\n\n\nNOT OK!\n\n"; + if (!parseDatabaseAndTableName(pos, expected, query->database, query->table)) + return false; + + String cluster_str; + if (ParserKeyword{"ON"}.ignore(pos, expected)) + { + if (!ASTQueryWithOnCluster::parse(pos, cluster_str, expected)) + return false; + } + query->cluster = cluster_str; } - query->cluster = cluster_str; ParserAlterCommandList p_command_list(alter_object_type); ASTPtr command_list; @@ -845,6 +858,8 @@ bool ParserAlterQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) return false; query->set(query->command_list, command_list); + query->alter_object = alter_object_type; + std::cerr << "\n\n\nalter query: " << query->dumpTree() << std::endl; return true; } diff --git a/src/Parsers/parseDatabaseAndTableName.cpp b/src/Parsers/parseDatabaseAndTableName.cpp index 13429df5b4d..c071f1b6eb4 100644 --- a/src/Parsers/parseDatabaseAndTableName.cpp +++ b/src/Parsers/parseDatabaseAndTableName.cpp @@ -42,6 +42,22 @@ bool parseDatabaseAndTableName(IParser::Pos & pos, Expected & expected, String & } +bool parseDatabase(IParser::Pos & pos, Expected & expected, String & database_str) +{ + ParserToken s_dot(TokenType::Dot); + ParserIdentifier identifier_parser; + + ASTPtr database; + database_str = ""; + + if (!identifier_parser.parse(pos, database, expected)) + return false; + + tryGetIdentifierNameInto(database, database_str); + return true; +} + + bool parseDatabaseAndTableNameOrAsterisks(IParser::Pos & pos, Expected & expected, String & database, bool & any_database, String & table, bool & any_table) { return IParserBase::wrapParseImpl(pos, [&] diff --git a/src/Parsers/parseDatabaseAndTableName.h b/src/Parsers/parseDatabaseAndTableName.h index e4699c8ad91..dc435ca047e 100644 --- a/src/Parsers/parseDatabaseAndTableName.h +++ b/src/Parsers/parseDatabaseAndTableName.h @@ -10,4 +10,6 @@ bool parseDatabaseAndTableName(IParser::Pos & pos, Expected & expected, String & /// Parses [db.]name or [db.]* or [*.]* bool parseDatabaseAndTableNameOrAsterisks(IParser::Pos & pos, Expected & expected, String & database, bool & any_database, String & table, bool & any_table); +bool parseDatabase(IParser::Pos & pos, Expected & expected, String & database_str); + } diff --git a/src/Storages/AlterCommands.cpp b/src/Storages/AlterCommands.cpp index 7388f44fee1..de9d9e594fd 100644 --- a/src/Storages/AlterCommands.cpp +++ b/src/Storages/AlterCommands.cpp @@ -312,6 +312,14 @@ std::optional AlterCommand::parse(const ASTAlterCommand * command_ command.settings_changes = command_ast->settings_changes->as().changes; return command; } + else if (command_ast->type == ASTAlterCommand::MODIFY_DATABASE_SETTING) + { + AlterCommand command; + command.ast = command_ast->clone(); + command.type = AlterCommand::MODIFY_DATABASE_SETTING; + command.settings_changes = command_ast->settings_changes->as().changes; + return command; + } else if (command_ast->type == ASTAlterCommand::RESET_SETTING) { AlterCommand command; @@ -350,6 +358,21 @@ std::optional AlterCommand::parse(const ASTAlterCommand * command_ } +void AlterCommands::apply(DatabasePtr database, ContextPtr context) const +{ + for (const AlterCommand & command : *this) + { + if (!command.ignore) + { + if (command.type == AlterCommand::MODIFY_DATABASE_SETTING) + database->modifySettings(command.settings_changes, context); + else + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unsupported alter command"); + } + } +} + + void AlterCommand::apply(StorageInMemoryMetadata & metadata, ContextPtr context) const { if (type == ADD_COLUMN) @@ -877,6 +900,8 @@ String alterTypeToString(const AlterCommand::Type type) return "MODIFY SETTING"; case AlterCommand::Type::RESET_SETTING: return "RESET SETTING"; + case AlterCommand::Type::MODIFY_DATABASE_SETTING: + return "MODIFY DATABASE SETTING"; case AlterCommand::Type::MODIFY_QUERY: return "MODIFY QUERY"; case AlterCommand::Type::RENAME_COLUMN: @@ -1007,6 +1032,7 @@ void AlterCommands::prepare(const StorageInMemoryMetadata & metadata) prepared = true; } + void AlterCommands::validate(const StorageInMemoryMetadata & metadata, ContextPtr context) const { auto all_columns = metadata.columns; diff --git a/src/Storages/AlterCommands.h b/src/Storages/AlterCommands.h index 60f4ad7d552..a2a1a3b6709 100644 --- a/src/Storages/AlterCommands.h +++ b/src/Storages/AlterCommands.h @@ -13,6 +13,8 @@ namespace DB { class ASTAlterCommand; +class IDatabase; +using DatabasePtr = std::shared_ptr; /// Operation from the ALTER query (except for manipulation with PART/PARTITION). /// Adding Nested columns is not expanded to add individual columns. @@ -42,6 +44,7 @@ struct AlterCommand MODIFY_QUERY, RENAME_COLUMN, REMOVE_TTL, + MODIFY_DATABASE_SETTING, }; /// Which property user wants to remove from column @@ -194,6 +197,8 @@ public: /// Commands have to be prepared before apply. void apply(StorageInMemoryMetadata & metadata, ContextPtr context) const; + void apply(DatabasePtr database, ContextPtr context) const; + /// At least one command modify settings. bool hasSettingsAlterCommand() const; diff --git a/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp b/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp index b43e7656084..f9c448e9a5d 100644 --- a/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp +++ b/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp @@ -26,8 +26,9 @@ MaterializedPostgreSQLConsumer::MaterializedPostgreSQLConsumer( const std::string & start_lsn, const size_t max_block_size_, bool allow_automatic_update_, - Storages storages_) - : log(&Poco::Logger::get("PostgreSQLReaplicaConsumer")) + Storages storages_, + const String & name_for_logger) + : log(&Poco::Logger::get("PostgreSQLReplicaConsumer("+ name_for_logger +")")) , context(context_) , replication_slot_name(replication_slot_name_) , publication_name(publication_name_) @@ -270,12 +271,13 @@ void MaterializedPostgreSQLConsumer::processReplicationMessage(const char * repl case 'I': // Insert { Int32 relation_id = readInt32(replication_message, pos, size); + const auto & table_name = relation_id_to_name[relation_id]; + assert(!table_name.empty()); - if (!isSyncAllowed(relation_id)) + if (!isSyncAllowed(relation_id, table_name)) return; Int8 new_tuple = readInt8(replication_message, pos, size); - const auto & table_name = relation_id_to_name[relation_id]; auto buffer = buffers.find(table_name); assert(buffer != buffers.end()); @@ -287,11 +289,12 @@ void MaterializedPostgreSQLConsumer::processReplicationMessage(const char * repl case 'U': // Update { Int32 relation_id = readInt32(replication_message, pos, size); + const auto & table_name = relation_id_to_name[relation_id]; + assert(!table_name.empty()); - if (!isSyncAllowed(relation_id)) + if (!isSyncAllowed(relation_id, table_name)) return; - const auto & table_name = relation_id_to_name[relation_id]; auto buffer = buffers.find(table_name); assert(buffer != buffers.end()); @@ -335,14 +338,15 @@ void MaterializedPostgreSQLConsumer::processReplicationMessage(const char * repl case 'D': // Delete { Int32 relation_id = readInt32(replication_message, pos, size); + const auto & table_name = relation_id_to_name[relation_id]; + assert(!table_name.empty()); - if (!isSyncAllowed(relation_id)) + if (!isSyncAllowed(relation_id, table_name)) return; /// 0 or 1 if replica identity is set to full. For now only default replica identity is supported (with primary keys). readInt8(replication_message, pos, size); - const auto & table_name = relation_id_to_name[relation_id]; auto buffer = buffers.find(table_name); assert(buffer != buffers.end()); readTupleData(buffer->second, replication_message, pos, size, PostgreSQLQuery::DELETE); @@ -357,7 +361,7 @@ void MaterializedPostgreSQLConsumer::processReplicationMessage(const char * repl constexpr size_t transaction_commit_timestamp_len = 8; pos += unused_flags_len + commit_lsn_len + transaction_end_lsn_len + transaction_commit_timestamp_len; - LOG_DEBUG(log, "Current lsn: {} = {}", current_lsn, getLSNValue(current_lsn)); /// Will be removed + // LOG_DEBUG(log, "Current lsn: {} = {}", current_lsn, getLSNValue(current_lsn)); /// Will be removed final_lsn = current_lsn; break; @@ -371,7 +375,7 @@ void MaterializedPostgreSQLConsumer::processReplicationMessage(const char * repl readString(replication_message, pos, size, relation_namespace); readString(replication_message, pos, size, relation_name); - if (!isSyncAllowed(relation_id)) + if (!isSyncAllowed(relation_id, relation_name)) return; if (storages.find(relation_name) == storages.end()) @@ -522,15 +526,32 @@ String MaterializedPostgreSQLConsumer::advanceLSN(std::shared_ptrsecond; + assert(!table_start_lsn.empty()); + + if (getLSNValue(current_lsn) >= getLSNValue(table_start_lsn)) + { + LOG_TRACE(log, "Synchronization is started for table: {} (start_lsn: {})", relation_name, table_start_lsn); + waiting_list.erase(new_table_with_lsn); + return true; + } + } + + auto skipped_table_with_lsn = skip_list.find(relation_id); /// Table is not present in a skip list - allow synchronization. - if (table_with_lsn == skip_list.end()) + if (skipped_table_with_lsn == skip_list.end()) return true; - const auto & table_start_lsn = table_with_lsn->second; + const auto & table_start_lsn = skipped_table_with_lsn->second; /// Table is in a skip list and has not yet received a valid lsn == it has not been reloaded. if (table_start_lsn.empty()) @@ -544,7 +565,7 @@ bool MaterializedPostgreSQLConsumer::isSyncAllowed(Int32 relation_id) LOG_TRACE(log, "Synchronization is resumed for table: {} (start_lsn: {})", relation_id_to_name[relation_id], table_start_lsn); - skip_list.erase(table_with_lsn); + skip_list.erase(skipped_table_with_lsn); return true; } @@ -576,6 +597,34 @@ void MaterializedPostgreSQLConsumer::markTableAsSkipped(Int32 relation_id, const } +void MaterializedPostgreSQLConsumer::addNested(const String & postgres_table_name, StoragePtr nested_storage, const String & table_start_lsn) +{ + /// Cache new pointer to replacingMergeTree table. + storages.emplace(postgres_table_name, nested_storage); + + /// Add new in-memory buffer. + buffers.emplace(postgres_table_name, Buffer(nested_storage)); + + /// Replication consumer will read wall and check for currently processed table whether it is allowed to start applying + /// changed to this table. + waiting_list[postgres_table_name] = table_start_lsn; +} + + +void MaterializedPostgreSQLConsumer::updateNested(const String & table_name, StoragePtr nested_storage, Int32 table_id, const String & table_start_lsn) +{ + /// Cache new pointer to replacingMergeTree table. + storages[table_name] = nested_storage; + + /// Create a new empty buffer (with updated metadata), where data is first loaded before syncing into actual table. + auto & buffer = buffers.find(table_name)->second; + buffer.createEmptyBuffer(nested_storage); + + /// Set start position to valid lsn. Before it was an empty string. Further read for table allowed, if it has a valid lsn. + skip_list[table_id] = table_start_lsn; +} + + /// Read binary changes from replication slot via COPY command (starting from current lsn in a slot). bool MaterializedPostgreSQLConsumer::readFromReplicationSlot() { @@ -625,9 +674,8 @@ bool MaterializedPostgreSQLConsumer::readFromReplicationSlot() tryLogCurrentException(__PRETTY_FUNCTION__); return false; } - catch (const pqxx::broken_connection & e) + catch (const pqxx::broken_connection &) { - LOG_ERROR(log, "Connection error: {}", e.what()); connection->tryUpdateConnection(); return false; } @@ -641,6 +689,7 @@ bool MaterializedPostgreSQLConsumer::readFromReplicationSlot() if (error_message.find("out of relcache_callback_list slots") == std::string::npos) tryLogCurrentException(__PRETTY_FUNCTION__); + connection->tryUpdateConnection(); return false; } catch (const pqxx::conversion_error & e) @@ -704,17 +753,4 @@ bool MaterializedPostgreSQLConsumer::consume(std::vectorsecond; - buffer.createEmptyBuffer(nested_storage); - - /// Set start position to valid lsn. Before it was an empty string. Further read for table allowed, if it has a valid lsn. - skip_list[table_id] = table_start_lsn; -} - } diff --git a/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.h b/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.h index 8f3224784f1..327c6866760 100644 --- a/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.h +++ b/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.h @@ -27,7 +27,8 @@ public: const String & start_lsn, const size_t max_block_size_, bool allow_automatic_update_, - Storages storages_); + Storages storages_, + const String & name_for_logger); bool consume(std::vector> & skipped_tables); @@ -35,6 +36,8 @@ public: /// process if it was skipped due to schema changes. void updateNested(const String & table_name, StoragePtr nested_storage, Int32 table_id, const String & table_start_lsn); + void addNested(const String & postgres_table_name, StoragePtr nested_storage, const String & table_start_lsn); + private: /// Read approximarely up to max_block_size changes from WAL. bool readFromReplicationSlot(); @@ -45,7 +48,7 @@ private: void processReplicationMessage(const char * replication_message, size_t size); - bool isSyncAllowed(Int32 relation_id); + bool isSyncAllowed(Int32 relation_id, const String & relation_name); struct Buffer { @@ -111,9 +114,12 @@ private: String table_to_insert; /// List of tables which need to be synced after last replication stream. + /// Holds `postgres_table_name` set. std::unordered_set tables_to_sync; + /// `postgres_table_name` -> ReplacingMergeTree table. Storages storages; + /// `postgres_table_name` -> In-memory buffer. Buffers buffers; std::unordered_map relation_id_to_name; @@ -133,6 +139,7 @@ private: /// if relation definition has changed since the last relation definition message. std::unordered_map schema_data; + /// `postgres_relation_id` -> `start_lsn` /// skip_list contains relation ids for tables on which ddl was performed, which can break synchronization. /// This breaking changes are detected in replication stream in according replication message and table is added to skip list. /// After it is finished, a temporary replication slot is created with 'export snapshot' option, and start_lsn is returned. @@ -142,5 +149,13 @@ private: /// No needed message, related to reloaded table will be missed, because messages are not consumed in the meantime, /// i.e. we will not miss the first start_lsn position for reloaded table. std::unordered_map skip_list; + + /// `postgres_table_name` -> `start_lsn` + /// For dynamically added tables. A new table is loaded via snapshot and we get a start lsn position. + /// Once consumer reaches this position, it starts applying replication messages to this table. + /// Inside replication handler we have to ensure that replication consumer does not read data from wal + /// while the process of adding a table to replication is not finished, + /// because we might go beyond this start lsn position before consumer knows that a new table was added. + std::unordered_map waiting_list; }; } diff --git a/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp b/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp index 3477397adb7..55864305c8f 100644 --- a/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp +++ b/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp @@ -11,18 +11,20 @@ #include #include #include +#include namespace DB { -static const auto RESCHEDULE_MS = 500; +static const auto RESCHEDULE_MS = 1000; static const auto BACKOFF_TRESHOLD_MS = 10000; namespace ErrorCodes { extern const int LOGICAL_ERROR; extern const int BAD_ARGUMENTS; + extern const int POSTGRESQL_REPLICATION_INTERNAL_ERROR; } PostgreSQLReplicationHandler::PostgreSQLReplicationHandler( @@ -36,7 +38,9 @@ PostgreSQLReplicationHandler::PostgreSQLReplicationHandler( bool allow_automatic_update_, bool is_materialized_postgresql_database_, const String tables_list_) - : log(&Poco::Logger::get("PostgreSQLReplicationHandler")) + : log(&Poco::Logger::get("PostgreSQLReplicationHandler(" + + (is_materialized_postgresql_database_ ? remote_database_name_ : remote_database_name_ + '.' + tables_list_) + + ")")) , context(context_) , is_attach(is_attach_) , remote_database_name(remote_database_name_) @@ -46,7 +50,6 @@ PostgreSQLReplicationHandler::PostgreSQLReplicationHandler( , allow_automatic_update(allow_automatic_update_) , is_materialized_postgresql_database(is_materialized_postgresql_database_) , tables_list(tables_list_) - , connection(std::make_shared(connection_info_)) , milliseconds_to_wait(RESCHEDULE_MS) { replication_slot = fmt::format("{}_ch_replication_slot", replication_identifier); @@ -73,7 +76,8 @@ void PostgreSQLReplicationHandler::waitConnectionAndStart() { try { - connection->connect(); /// Will throw pqxx::broken_connection if no connection at the moment + postgres::Connection connection(connection_info); + connection.connect(); /// Will throw pqxx::broken_connection if no connection at the moment startSynchronization(false); } catch (const pqxx::broken_connection & pqxx_error) @@ -98,14 +102,9 @@ void PostgreSQLReplicationHandler::shutdown() void PostgreSQLReplicationHandler::startSynchronization(bool throw_on_error) { - { - pqxx::work tx(connection->getRef()); - createPublicationIfNeeded(tx); - tx.commit(); - } - postgres::Connection replication_connection(connection_info, /* replication */true); pqxx::nontransaction tx(replication_connection.getRef()); + createPublicationIfNeeded(tx); /// List of nested tables (table_name -> nested_storage), which is passed to replication consumer. std::unordered_map nested_storages; @@ -116,18 +115,21 @@ void PostgreSQLReplicationHandler::startSynchronization(bool throw_on_error) /// 2. if replication slot already exist, start_lsn is read from pg_replication_slots as /// `confirmed_flush_lsn` - the address (LSN) up to which the logical slot's consumer has confirmed receiving data. /// Data older than this is not available anymore. - /// TODO: more tests String snapshot_name, start_lsn; auto initial_sync = [&]() { createReplicationSlot(tx, start_lsn, snapshot_name); + /// Loading tables from snapshot requires a certain transaction type, so we need to open a new transactin. + /// But we cannot have more than one open transaciton on the same connection. Therefore we have + /// a separate connection to load tables. + postgres::Connection tmp_connection(connection_info); for (const auto & [table_name, storage] : materialized_storages) { try { - nested_storages[table_name] = loadFromSnapshot(snapshot_name, table_name, storage->as ()); + nested_storages[table_name] = loadFromSnapshot(tmp_connection, snapshot_name, table_name, storage->as ()); } catch (Exception & e) { @@ -164,6 +166,7 @@ void PostgreSQLReplicationHandler::startSynchronization(bool throw_on_error) auto * materialized_storage = storage->as (); try { + /// TODO: THIS IS INCORRENT, we might get here if there is no nested, need to check and reload. /// Try load nested table, set materialized table metadata. nested_storages[table_name] = materialized_storage->prepare(); } @@ -187,13 +190,14 @@ void PostgreSQLReplicationHandler::startSynchronization(bool throw_on_error) /// Handler uses it only for loadFromSnapshot and shutdown methods. consumer = std::make_shared( context, - connection, + std::make_shared(connection_info), replication_slot, publication_name, start_lsn, max_block_size, allow_automatic_update, - nested_storages); + nested_storages, + (is_materialized_postgresql_database ? remote_database_name : remote_database_name + '.' + tables_list)); consumer_task->activateAndSchedule(); @@ -202,10 +206,31 @@ void PostgreSQLReplicationHandler::startSynchronization(bool throw_on_error) } -StoragePtr PostgreSQLReplicationHandler::loadFromSnapshot(String & snapshot_name, const String & table_name, +void PostgreSQLReplicationHandler::addStructureToMaterializedStorage(StorageMaterializedPostgreSQL * storage, const String & table_name, ASTPtr database_def) +{ + postgres::Connection connection(connection_info); + pqxx::nontransaction tx(connection.getRef()); + auto table_structure = std::make_unique(fetchPostgreSQLTableStructure(tx, table_name, true, true, true)); + + auto engine = std::make_shared(); + engine->name = "MaterializedPostgreSQL"; + engine->arguments = args; + + auto ast_storage = std::make_shared(); + storage->set(storage->engine, engine); + + auto storage_def = storage->getCreateNestedTableQuery(std::move(table_structure)); + ContextMutablePtr local_context = Context::createCopy(context); + auto table = createTableFromAST(*assert_cast(storage_def.get()), remote_database_name, "", local_context, false).second; + + storage->setInMemoryMetadata(table->getInMemoryMetadata()); +} + + +StoragePtr PostgreSQLReplicationHandler::loadFromSnapshot(postgres::Connection & connection, String & snapshot_name, const String & table_name, StorageMaterializedPostgreSQL * materialized_storage) { - auto tx = std::make_shared(connection->getRef()); + auto tx = std::make_shared(connection.getRef()); std::string query_str = fmt::format("SET TRANSACTION SNAPSHOT '{}'", snapshot_name); tx->exec(query_str); @@ -290,7 +315,7 @@ void PostgreSQLReplicationHandler::consumerFunc() } -bool PostgreSQLReplicationHandler::isPublicationExist(pqxx::work & tx) +bool PostgreSQLReplicationHandler::isPublicationExist(pqxx::nontransaction & tx) { std::string query_str = fmt::format("SELECT exists (SELECT 1 FROM pg_publication WHERE pubname = '{}')", publication_name); pqxx::result result{tx.exec(query_str)}; @@ -299,7 +324,7 @@ bool PostgreSQLReplicationHandler::isPublicationExist(pqxx::work & tx) } -void PostgreSQLReplicationHandler::createPublicationIfNeeded(pqxx::work & tx) +void PostgreSQLReplicationHandler::createPublicationIfNeeded(pqxx::nontransaction & tx) { auto publication_exists = isPublicationExist(tx); @@ -310,7 +335,7 @@ void PostgreSQLReplicationHandler::createPublicationIfNeeded(pqxx::work & tx) "Publication {} already exists, but it is a CREATE query, not ATTACH. Publication will be dropped", publication_name); - connection->execWithRetry([&](pqxx::nontransaction & tx_){ dropPublication(tx_); }); + dropPublication(tx); } if (!is_attach || !publication_exists) @@ -389,7 +414,7 @@ void PostgreSQLReplicationHandler::createReplicationSlot( pqxx::result result{tx.exec(query_str)}; start_lsn = result[0][1].as(); snapshot_name = result[0][2].as(); - LOG_TRACE(log, "Created replication slot: {}, start lsn: {}", replication_slot, start_lsn); + LOG_TRACE(log, "Created replication slot: {}, start lsn: {}, snapshot: {}", replication_slot, start_lsn, snapshot_name); } catch (Exception & e) { @@ -422,22 +447,39 @@ void PostgreSQLReplicationHandler::dropPublication(pqxx::nontransaction & tx) } +void PostgreSQLReplicationHandler::addTableToPublication(pqxx::nontransaction & ntx, const String & table_name) +{ + std::string query_str = fmt::format("ALTER PUBLICATION {} ADD TABLE ONLY {}", publication_name, doubleQuoteString(table_name)); + ntx.exec(query_str); + LOG_TRACE(log, "Added table `{}` to publication `{}`", table_name, publication_name); +} + + +void PostgreSQLReplicationHandler::removeTableFromPublication(pqxx::nontransaction & ntx, const String & table_name) +{ + std::string query_str = fmt::format("ALTER PUBLICATION {} DROP TABLE ONLY {}", publication_name, doubleQuoteString(table_name)); + ntx.exec(query_str); + LOG_TRACE(log, "Removed table `{}` from publication `{}`", table_name, publication_name); +} + + void PostgreSQLReplicationHandler::shutdownFinal() { try { shutdown(); - connection->execWithRetry([&](pqxx::nontransaction & tx){ dropPublication(tx); }); + postgres::Connection connection(connection_info); + connection.execWithRetry([&](pqxx::nontransaction & tx){ dropPublication(tx); }); String last_committed_lsn; - connection->execWithRetry([&](pqxx::nontransaction & tx) + connection.execWithRetry([&](pqxx::nontransaction & tx) { if (isReplicationSlotExist(tx, last_committed_lsn, /* temporary */false)) dropReplicationSlot(tx, /* temporary */false); }); - connection->execWithRetry([&](pqxx::nontransaction & tx) + connection.execWithRetry([&](pqxx::nontransaction & tx) { if (isReplicationSlotExist(tx, last_committed_lsn, /* temporary */true)) dropReplicationSlot(tx, /* temporary */true); @@ -453,12 +495,17 @@ void PostgreSQLReplicationHandler::shutdownFinal() /// Used by MaterializedPostgreSQL database engine. -NameSet PostgreSQLReplicationHandler::fetchRequiredTables(postgres::Connection & connection_) +NameSet PostgreSQLReplicationHandler::fetchRequiredTables() { - pqxx::work tx(connection_.getRef()); + postgres::Connection connection(connection_info); NameSet result_tables; + bool publication_exists_before_startup; + + { + pqxx::nontransaction tx(connection.getRef()); + publication_exists_before_startup = isPublicationExist(tx); + } - bool publication_exists_before_startup = isPublicationExist(tx); LOG_DEBUG(log, "Publication exists: {}, is attach: {}", publication_exists_before_startup, is_attach); Strings expected_tables; @@ -479,7 +526,7 @@ NameSet PostgreSQLReplicationHandler::fetchRequiredTables(postgres::Connection & "Publication {} already exists, but it is a CREATE query, not ATTACH. Publication will be dropped", publication_name); - connection->execWithRetry([&](pqxx::nontransaction & tx_){ dropPublication(tx_); }); + connection.execWithRetry([&](pqxx::nontransaction & tx_){ dropPublication(tx_); }); } else { @@ -489,13 +536,20 @@ NameSet PostgreSQLReplicationHandler::fetchRequiredTables(postgres::Connection & "Publication {} already exists and tables list is empty. Assuming publication is correct.", publication_name); - result_tables = fetchPostgreSQLTablesList(tx, postgres_schema); + { + pqxx::nontransaction tx(connection.getRef()); + result_tables = fetchPostgreSQLTablesList(tx, postgres_schema); + } } /// Check tables list from publication is the same as expected tables list. /// If not - drop publication and return expected tables list. else { - result_tables = fetchTablesFromPublication(tx); + { + pqxx::work tx(connection.getRef()); + result_tables = fetchTablesFromPublication(tx); + } + NameSet diff; std::set_symmetric_difference(expected_tables.begin(), expected_tables.end(), result_tables.begin(), result_tables.end(), @@ -514,7 +568,7 @@ NameSet PostgreSQLReplicationHandler::fetchRequiredTables(postgres::Connection & "Publication {} already exists, but specified tables list differs from publication tables list in tables: {}.", publication_name, diff_tables); - connection->execWithRetry([&](pqxx::nontransaction & tx_){ dropPublication(tx_); }); + connection.execWithRetry([&](pqxx::nontransaction & tx_){ dropPublication(tx_); }); } } } @@ -531,11 +585,13 @@ NameSet PostgreSQLReplicationHandler::fetchRequiredTables(postgres::Connection & /// Fetch all tables list from database. Publication does not exist yet, which means /// that no replication took place. Publication will be created in /// startSynchronization method. - result_tables = fetchPostgreSQLTablesList(tx, postgres_schema); + { + pqxx::nontransaction tx(connection.getRef()); + result_tables = fetchPostgreSQLTablesList(tx, postgres_schema); + } } } - tx.commit(); return result_tables; } @@ -562,6 +618,57 @@ PostgreSQLTableStructurePtr PostgreSQLReplicationHandler::fetchTableStructure( } +void PostgreSQLReplicationHandler::addTableToReplication(StorageMaterializedPostgreSQL * materialized_storage, const String & postgres_table_name) +{ + /// Note: we have to ensure that replication consumer task is stopped when we reload table, because otherwise + /// it can read wal beyond start lsn position (from which this table is being loaded), which will result in loosing data. + /// Therefore we wait here for it to finish current reading stream. We have to wait, because we cannot return OK to client right now. + consumer_task->deactivate(); + + try + { + postgres::Connection replication_connection(connection_info, /* replication */true); + String snapshot_name, start_lsn; + StoragePtr nested_storage; + + { + pqxx::nontransaction tx(replication_connection.getRef()); + auto table_structure = std::make_unique(fetchPostgreSQLTableStructure(tx, postgres_table_name, true, true, true)); + + if (isReplicationSlotExist(tx, start_lsn, /* temporary */true)) + dropReplicationSlot(tx, /* temporary */true); + createReplicationSlot(tx, start_lsn, snapshot_name, /* temporary */true); + + { + postgres::Connection tmp_connection(connection_info); + nested_storage = loadFromSnapshot(tmp_connection, snapshot_name, postgres_table_name, materialized_storage->as ()); + } + auto nested_table_id = nested_storage->getStorageID(); + materialized_storage->setNestedStorageID(nested_table_id); + nested_storage = materialized_storage->prepare(); + } + + { + pqxx::nontransaction tx(replication_connection.getRef()); + addTableToPublication(tx, postgres_table_name); + } + + /// Pass storage to consumer and lsn position, from which to start receiving replication messages for this table. + consumer->addNested(postgres_table_name, nested_storage, start_lsn); + } + catch (...) + { + consumer_task->scheduleAfter(RESCHEDULE_MS); + + auto error_message = getCurrentExceptionMessage(false); + throw Exception(ErrorCodes::POSTGRESQL_REPLICATION_INTERNAL_ERROR, + "Failed to add table `{}` to replication. Info: {}", postgres_table_name, error_message); + } + + consumer_task->schedule(); +} + + void PostgreSQLReplicationHandler::reloadFromSnapshot(const std::vector> & relation_data) { /// If table schema has changed, the table stops consuming changes from replication stream. @@ -579,6 +686,7 @@ void PostgreSQLReplicationHandler::reloadFromSnapshot(const std::vectorcreateTemporary(); /// This snapshot is valid up to the end of the transaction, which exported it. - StoragePtr temp_nested_storage = loadFromSnapshot(snapshot_name, table_name, + StoragePtr temp_nested_storage = loadFromSnapshot(tmp_connection, snapshot_name, table_name, temp_materialized_storage->as ()); auto table_id = materialized_storage->getNestedStorageID(); diff --git a/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.h b/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.h index 3a0bedc0852..37ea6b2cbea 100644 --- a/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.h +++ b/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.h @@ -8,11 +8,6 @@ namespace DB { -/// IDEA: There is ALTER PUBLICATION command to dynamically add and remove tables for replicating (the command is transactional). -/// (Probably, if in a replication stream comes a relation name, which does not currently -/// exist in CH, it can be loaded via snapshot while stream is stopped and then comparing wal positions with -/// current lsn and table start lsn. - class StorageMaterializedPostgreSQL; class PostgreSQLReplicationHandler @@ -43,24 +38,32 @@ public: void addStorage(const std::string & table_name, StorageMaterializedPostgreSQL * storage); /// Fetch list of tables which are going to be replicated. Used for database engine. - NameSet fetchRequiredTables(postgres::Connection & connection_); + NameSet fetchRequiredTables(); /// Start replication setup immediately. void startSynchronization(bool throw_on_error); + void addTableToReplication(StorageMaterializedPostgreSQL * storage, const String & postgres_table_name); + + void addStructureToMaterializedStorage(StorageMaterializedPostgreSQL * storage, const String & table_name); + private: using MaterializedStorages = std::unordered_map; /// Methods to manage Publication. - bool isPublicationExist(pqxx::work & tx); + bool isPublicationExist(pqxx::nontransaction & tx); - void createPublicationIfNeeded(pqxx::work & tx); + void createPublicationIfNeeded(pqxx::nontransaction & tx); NameSet fetchTablesFromPublication(pqxx::work & tx); void dropPublication(pqxx::nontransaction & ntx); + void addTableToPublication(pqxx::nontransaction & ntx, const String & table_name); + + void removeTableFromPublication(pqxx::nontransaction & ntx, const String & table_name); + /// Methods to manage Replication Slots. bool isReplicationSlotExist(pqxx::nontransaction & tx, String & start_lsn, bool temporary = false); @@ -75,7 +78,7 @@ private: void consumerFunc(); - StoragePtr loadFromSnapshot(std::string & snapshot_name, const String & table_name, StorageMaterializedPostgreSQL * materialized_storage); + StoragePtr loadFromSnapshot(postgres::Connection & connection, std::string & snapshot_name, const String & table_name, StorageMaterializedPostgreSQL * materialized_storage); void reloadFromSnapshot(const std::vector> & relation_data); @@ -110,9 +113,6 @@ private: String replication_slot, publication_name; - /// Shared between replication_consumer and replication_handler, but never accessed concurrently. - std::shared_ptr connection; - /// Replication consumer. Manages decoding of replication stream and syncing into tables. std::shared_ptr consumer; diff --git a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp index e24e252bf01..5ff00f9babb 100644 --- a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp +++ b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp @@ -51,6 +51,7 @@ StorageMaterializedPostgreSQL::StorageMaterializedPostgreSQL( std::unique_ptr replication_settings) : IStorage(table_id_) , WithContext(context_->getGlobalContext()) + , log(&Poco::Logger::get("StorageMaterializedPostgreSQL(" + postgres::formatNameForLogs(remote_database_name, remote_table_name) + ")")) , is_materialized_postgresql_database(false) , has_nested(false) , nested_context(makeNestedTableContext(context_->getGlobalContext())) @@ -72,19 +73,26 @@ StorageMaterializedPostgreSQL::StorageMaterializedPostgreSQL( getContext(), is_attach, replication_settings->materialized_postgresql_max_block_size.value, - /* allow_automatic_update */ false, /* is_materialized_postgresql_database */false); + /* allow_automatic_update */ false, /* is_materialized_postgresql_database */false, + remote_table_name_); } /// For the case of MaterializePosgreSQL database engine. /// It is used when nested ReplacingMergeeTree table has not yet be created by replication thread. /// In this case this storage can't be used for read queries. -StorageMaterializedPostgreSQL::StorageMaterializedPostgreSQL(const StorageID & table_id_, ContextPtr context_) +StorageMaterializedPostgreSQL::StorageMaterializedPostgreSQL( + const StorageID & table_id_, + ContextPtr context_, + const String & postgres_database_name, + const String & postgres_table_name) : IStorage(table_id_) , WithContext(context_->getGlobalContext()) + , log(&Poco::Logger::get("StorageMaterializedPostgreSQL(" + postgres::formatNameForLogs(postgres_database_name, postgres_table_name) + ")")) , is_materialized_postgresql_database(true) , has_nested(false) , nested_context(makeNestedTableContext(context_->getGlobalContext())) + , nested_table_id(table_id_) { } @@ -92,9 +100,14 @@ StorageMaterializedPostgreSQL::StorageMaterializedPostgreSQL(const StorageID & t /// Constructor for MaterializedPostgreSQL table engine - for the case of MaterializePosgreSQL database engine. /// It is used when nested ReplacingMergeeTree table has already been created by replication thread. /// This storage is ready to handle read queries. -StorageMaterializedPostgreSQL::StorageMaterializedPostgreSQL(StoragePtr nested_storage_, ContextPtr context_) +StorageMaterializedPostgreSQL::StorageMaterializedPostgreSQL( + StoragePtr nested_storage_, + ContextPtr context_, + const String & postgres_database_name, + const String & postgres_table_name) : IStorage(nested_storage_->getStorageID()) , WithContext(context_->getGlobalContext()) + , log(&Poco::Logger::get("StorageMaterializedPostgreSQL(" + postgres::formatNameForLogs(postgres_database_name, postgres_table_name) + ")")) , is_materialized_postgresql_database(true) , has_nested(true) , nested_context(makeNestedTableContext(context_->getGlobalContext())) @@ -120,7 +133,7 @@ StoragePtr StorageMaterializedPostgreSQL::createTemporary() const } auto new_context = Context::createCopy(context); - return StorageMaterializedPostgreSQL::create(tmp_table_id, new_context); + return StorageMaterializedPostgreSQL::create(tmp_table_id, new_context, "", table_id.table_name); } @@ -163,6 +176,7 @@ void StorageMaterializedPostgreSQL::createNestedIfNeeded(PostgreSQLTableStructur const auto ast_create = getCreateNestedTableQuery(std::move(table_structure)); auto table_id = getStorageID(); auto tmp_nested_table_id = StorageID(table_id.database_name, getNestedTableName()); + LOG_DEBUG(log, "Creating clickhouse table for postgresql table {}", table_id.getNameForLogs()); try { diff --git a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.h b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.h index becb4f6ba10..62bc6ed713f 100644 --- a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.h +++ b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.h @@ -49,7 +49,6 @@ namespace DB * * All database methods, apart from tryGetTable(), are devoted only to nested table. * NOTE: It makes sense to allow rename method for MaterializedPostgreSQL table via database method. - * TODO: Make sure replication-to-table data channel is done only by relation_id. * * Also main table has the same InMemoryMetadata as its nested table, so if metadata of nested table changes - main table also has * to update its metadata, because all read requests are passed to MaterializedPostgreSQL table and then it redirects read @@ -57,7 +56,7 @@ namespace DB * * When there is a need to update table structure, there will be created a new MaterializedPostgreSQL table with its own nested table, * it will have updated table schema and all data will be loaded from scratch in the background, while previous table with outadted table - * structure will still serve read requests. When data is loaded, nested tables will be swapped, metadata of metarialzied table will be + * structure will still serve read requests. When data is loaded, nested tables will be swapped, metadata of materialized table will be * updated according to nested table. * **/ @@ -67,9 +66,11 @@ class StorageMaterializedPostgreSQL final : public shared_ptr_helper; public: - StorageMaterializedPostgreSQL(const StorageID & table_id_, ContextPtr context_); + StorageMaterializedPostgreSQL(const StorageID & table_id_, ContextPtr context_, + const String & postgres_database_name, const String & postgres_table_name); - StorageMaterializedPostgreSQL(StoragePtr nested_storage_, ContextPtr context_); + StorageMaterializedPostgreSQL(StoragePtr nested_storage_, ContextPtr context_, + const String & postgres_database_name, const String & postgres_table_name); String getName() const override { return "MaterializedPostgreSQL"; } @@ -123,6 +124,8 @@ public: bool supportsFinal() const override { return true; } + ASTPtr getCreateNestedTableQuery(PostgreSQLTableStructurePtr table_structure); + protected: StorageMaterializedPostgreSQL( const StorageID & table_id_, @@ -140,10 +143,10 @@ private: ASTPtr getColumnDeclaration(const DataTypePtr & data_type) const; - ASTPtr getCreateNestedTableQuery(PostgreSQLTableStructurePtr table_structure); - String getNestedTableName() const; + Poco::Logger * log; + /// Not nullptr only for single MaterializedPostgreSQL storage, because for MaterializedPostgreSQL /// database engine there is one replication handler for all tables. std::unique_ptr replication_handler; diff --git a/src/Storages/StorageInMemoryMetadata.h b/src/Storages/StorageInMemoryMetadata.h index 9accdb9b3b6..66ad2b0254c 100644 --- a/src/Storages/StorageInMemoryMetadata.h +++ b/src/Storages/StorageInMemoryMetadata.h @@ -41,7 +41,7 @@ struct StorageInMemoryMetadata TTLColumnsDescription column_ttls_by_name; /// TTL expressions for table (Move and Rows) TTLTableDescription table_ttl; - /// SETTINGS expression. Supported for MergeTree, Buffer and Kafka. + /// SETTINGS expression. Supported for MergeTree, Buffer, Kafka, RabbitMQ. ASTPtr settings_changes; /// SELECT QUERY. Supported for MaterializedView and View (have to support LiveView). SelectQueryDescription select; From 174340074cbd8181179e1bdd9f4141aeb8881e62 Mon Sep 17 00:00:00 2001 From: kssenii Date: Fri, 27 Aug 2021 15:50:45 +0300 Subject: [PATCH 07/80] Dynamically add tables complete --- src/Common/ErrorCodes.cpp | 11 +- src/Core/PostgreSQL/Connection.cpp | 3 +- src/Core/PostgreSQL/Utils.cpp | 7 +- src/Databases/DatabaseAtomic.cpp | 7 +- .../DatabaseMaterializedPostgreSQL.cpp | 148 +++++++++--------- .../DatabaseMaterializedPostgreSQL.h | 4 + src/Interpreters/DatabaseCatalog.cpp | 5 +- src/Interpreters/InterpreterAlterQuery.cpp | 8 +- src/Parsers/ASTAlterQuery.cpp | 5 + src/Parsers/ASTAlterQuery.h | 1 - src/Parsers/ParserAlterQuery.cpp | 4 - .../MaterializedPostgreSQLConsumer.cpp | 8 +- .../PostgreSQLReplicationHandler.cpp | 30 ++-- .../PostgreSQL/PostgreSQLReplicationHandler.h | 5 +- .../StorageMaterializedPostgreSQL.cpp | 10 +- .../test.py | 82 ++++++++++ 16 files changed, 213 insertions(+), 125 deletions(-) diff --git a/src/Common/ErrorCodes.cpp b/src/Common/ErrorCodes.cpp index 36d0fafdb4c..681319efe5e 100644 --- a/src/Common/ErrorCodes.cpp +++ b/src/Common/ErrorCodes.cpp @@ -577,9 +577,14 @@ M(606, BACKUP_IS_EMPTY) \ M(607, BACKUP_ELEMENT_DUPLICATE) \ M(608, CANNOT_RESTORE_TABLE) \ - M(609, POSTGRESQL_CONNECTION_FAILURE) \ - M(610, POSTGRESQL_REPLICATION_INTERNAL_ERROR) \ - M(611, QUERY_NOT_ALLOWED) \ + M(609, FUNCTION_ALREADY_EXISTS) \ + M(610, CANNOT_DROP_SYSTEM_FUNCTION) \ + M(611, CANNOT_CREATE_RECURSIVE_FUNCTION) \ + M(612, OBJECT_ALREADY_STORED_ON_DISK) \ + M(613, OBJECT_WAS_NOT_STORED_ON_DISK) \ + M(614, POSTGRESQL_CONNECTION_FAILURE) \ + M(615, POSTGRESQL_REPLICATION_INTERNAL_ERROR) \ + M(616, QUERY_NOT_ALLOWED) \ \ M(999, KEEPER_EXCEPTION) \ M(1000, POCO_EXCEPTION) \ diff --git a/src/Core/PostgreSQL/Connection.cpp b/src/Core/PostgreSQL/Connection.cpp index 2cb52fcff81..8e127afb1db 100644 --- a/src/Core/PostgreSQL/Connection.cpp +++ b/src/Core/PostgreSQL/Connection.cpp @@ -1,9 +1,8 @@ #include "Connection.h" #if USE_LIBPQXX - #include -#include + namespace postgres { diff --git a/src/Core/PostgreSQL/Utils.cpp b/src/Core/PostgreSQL/Utils.cpp index 98cd706be69..accc3b29a93 100644 --- a/src/Core/PostgreSQL/Utils.cpp +++ b/src/Core/PostgreSQL/Utils.cpp @@ -21,11 +21,12 @@ ConnectionInfo formatConnectionString(String dbname, String host, UInt16 port, S String formatNameForLogs(const String & postgres_database_name, const String & postgres_table_name) { - if (postgres_database_name.empty()) - return postgres_table_name; + /// Logger for StorageMaterializedPostgreSQL - both db and table names. + /// Logger for PostgreSQLReplicationHandler and Consumer - either both db and table names or only db name. + assert(!postgres_database_name.empty()); if (postgres_table_name.empty()) return postgres_database_name; - return fmt::format("{}.{}", postgres_database_name, postgres_table_name); + return postgres_database_name + '.' + postgres_table_name; } } diff --git a/src/Databases/DatabaseAtomic.cpp b/src/Databases/DatabaseAtomic.cpp index 8ce17198d3d..5102c924140 100644 --- a/src/Databases/DatabaseAtomic.cpp +++ b/src/Databases/DatabaseAtomic.cpp @@ -578,11 +578,11 @@ void DatabaseAtomic::modifySettings(const SettingsChanges & settings_changes, Co create.uuid = getUUID(); create.if_not_exists = false; create.storage = assert_cast(storage_def.get()); - auto * ast_set_query = create.storage->settings; + auto * settings = create.storage->settings; - if (ast_set_query) + if (settings) { - auto & previous_settings = ast_set_query->changes; + auto & previous_settings = settings->changes; for (const auto & change : settings_changes) { auto it = std::find_if(previous_settings.begin(), previous_settings.end(), @@ -614,7 +614,6 @@ void DatabaseAtomic::modifySettings(const SettingsChanges & settings_changes, Co fs::path metadata_file_tmp_path = fs::path(metadata_root_path) / "metadata" / (database_name_escaped + ".sql.tmp"); fs::path metadata_file_path = fs::path(metadata_root_path) / "metadata" / (database_name_escaped + ".sql"); - /// Exclusive flag guarantees, that database is not created right now in another thread. WriteBufferFromFile out(metadata_file_tmp_path, statement.size(), O_WRONLY | O_CREAT | O_EXCL); writeString(statement, out); diff --git a/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.cpp b/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.cpp index b4b0037ff1b..4575a1d7270 100644 --- a/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.cpp +++ b/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.cpp @@ -5,6 +5,9 @@ #include #include +#include +#include +#include #include #include #include @@ -21,9 +24,6 @@ #include #include #include -#include -#include - namespace DB { @@ -145,10 +145,20 @@ void DatabaseMaterializedPostgreSQL::applySettings(const SettingsChanges & setti if (!settings->has(change.name)) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Database engine {} does not support setting `{}`", getEngineName(), change.name); - if (change.name == "materialized_postgresql_tables_list") + if ((change.name == "materialized_postgresql_tables_list")) { - if (local_context->isInternalQuery() || materialized_tables.empty()) - throw Exception(ErrorCodes::QUERY_NOT_ALLOWED, "Changin settings `{}` is allowed only internally. Use CREATE TABLE query", change.name); + if (!local_context->isInternalQuery()) + throw Exception(ErrorCodes::QUERY_NOT_ALLOWED, "Changing setting `{}` is not allowed", change.name); + } + else if (change.name == "materialized_postgresql_allow_automatic_update") + { + } + else if (change.name == "materialized_postgresql_max_block_size") + { + } + else + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown setting"); } settings->applyChange(change); @@ -192,7 +202,44 @@ void DatabaseMaterializedPostgreSQL::createTable(ContextPtr local_context, const return; } - throw Exception(ErrorCodes::QUERY_NOT_ALLOWED, "CREATE TABLE is not allowed for database engine {}", getEngineName()); + const auto & create = query->as(); + if (!create->attach) + throw Exception(ErrorCodes::QUERY_NOT_ALLOWED, "CREATE TABLE is not allowed for database engine {}. Use ATTACH TABLE instead", getEngineName()); + + /// Create ReplacingMergeTree table. + auto query_copy = query->clone(); + auto create_query = assert_cast(query_copy.get()); + create_query->attach = false; + create_query->attach_short_syntax = false; + DatabaseAtomic::createTable(StorageMaterializedPostgreSQL::makeNestedTableContext(local_context), table_name, table, query_copy); + + /// Attach MaterializedPostgreSQL table. + attachTable(table_name, table, {}); +} + + +String DatabaseMaterializedPostgreSQL::getTablesList() const +{ + String tables_list; + for (const auto & table : materialized_tables) + { + if (!tables_list.empty()) + tables_list += ','; + tables_list += table.first; + } + return tables_list; +} + + +ASTPtr DatabaseMaterializedPostgreSQL::getCreateTableQueryImpl(const String & table_name, ContextPtr local_context, bool throw_on_error) const +{ + if (!local_context->hasQueryContext()) + return DatabaseAtomic::getCreateTableQueryImpl(table_name, local_context, throw_on_error); + + auto storage = StorageMaterializedPostgreSQL::create(StorageID(database_name, table_name), getContext(), remote_database_name, table_name); + auto ast_storage = replication_handler->getCreateNestedTableQuery(storage.get(), table_name); + assert_cast(ast_storage.get())->uuid = UUIDHelpers::generateV4(); + return ast_storage; } @@ -202,21 +249,34 @@ void DatabaseMaterializedPostgreSQL::attachTable(const String & table_name, cons { auto set = std::make_shared(); set->is_standalone = false; + auto tables_to_replicate = settings->materialized_postgresql_tables_list.value; - set->changes = {SettingChange("materialized_postgresql_tables_list", tables_to_replicate.empty() ? table_name : (tables_to_replicate + "," + table_name))}; + if (tables_to_replicate.empty()) + tables_to_replicate = getTablesList(); + + /// tables_to_replicate can be empty if postgres database had no tables when this database was created. + set->changes = {SettingChange("materialized_postgresql_tables_list", + tables_to_replicate.empty() ? table_name : (tables_to_replicate + "," + table_name))}; auto command = std::make_shared(); command->type = ASTAlterCommand::Type::MODIFY_DATABASE_SETTING; - command->children.emplace_back(std::move(set)); + command->settings_changes = std::move(set); - auto expr = std::make_shared(); - expr->children.push_back(command); + auto command_list = std::make_shared(); + command_list->children.push_back(command); - ASTAlterQuery alter; - alter.alter_object = ASTAlterQuery::AlterObjectType::DATABASE; - alter.children.emplace_back(std::move(expr)); + auto query = std::make_shared(); + auto * alter = query->as(); - auto storage = StorageMaterializedPostgreSQL::create(StorageID(database_name, table_name), getContext(), remote_database_name, table_name); + alter->alter_object = ASTAlterQuery::AlterObjectType::DATABASE; + alter->database = database_name; + alter->set(alter->command_list, command_list); + + auto current_context = Context::createCopy(getContext()->getGlobalContext()); + current_context->setInternalQuery(true); + InterpreterAlterQuery(query, current_context).execute(); + + auto storage = StorageMaterializedPostgreSQL::create(table, getContext(), remote_database_name, table_name); materialized_tables[table_name] = storage; replication_handler->addTableToReplication(dynamic_cast(storage.get()), table_name); } @@ -267,64 +327,6 @@ DatabaseTablesIteratorPtr DatabaseMaterializedPostgreSQL::getTablesIterator( return DatabaseAtomic::getTablesIterator(StorageMaterializedPostgreSQL::makeNestedTableContext(local_context), filter_by_table_name); } -static ASTPtr getColumnDeclaration(const DataTypePtr & data_type) -{ - WhichDataType which(data_type); - - if (which.isNullable()) - return makeASTFunction("Nullable", getColumnDeclaration(typeid_cast(data_type.get())->getNestedType())); - - if (which.isArray()) - return makeASTFunction("Array", getColumnDeclaration(typeid_cast(data_type.get())->getNestedType())); - - return std::make_shared(data_type->getName()); -} - - -ASTPtr DatabaseMaterializedPostgreSQL::getCreateTableQueryImpl(const String & table_name, ContextPtr local_context, bool throw_on_error) const -{ - if (!local_context->hasQueryContext()) - return DatabaseAtomic::getCreateTableQueryImpl(table_name, local_context, throw_on_error); - - /// Note: here we make an assumption that table structure will not change between call to this method and to attachTable(). - auto storage = StorageMaterializedPostgreSQL::create(StorageID(database_name, table_name), getContext(), remote_database_name, table_name); - replication_handler->addStructureToMaterializedStorage(storage.get(), table_name); - - auto create_table_query = std::make_shared(); - auto table_storage_define = storage_def->clone(); - create_table_query->set(create_table_query->storage, table_storage_define); - - auto columns_declare_list = std::make_shared(); - auto columns_expression_list = std::make_shared(); - - columns_declare_list->set(columns_declare_list->columns, columns_expression_list); - create_table_query->set(create_table_query->columns_list, columns_declare_list); - - /// init create query. - auto table_id = storage->getStorageID(); - create_table_query->table = table_id.table_name; - create_table_query->database = table_id.database_name; - - auto metadata_snapshot = storage->getInMemoryMetadataPtr(); - for (const auto & column_type_and_name : metadata_snapshot->getColumns().getAllPhysical()) - { - const auto & column_declaration = std::make_shared(); - column_declaration->name = column_type_and_name.name; - column_declaration->type = getColumnDeclaration(column_type_and_name.type); - columns_expression_list->children.emplace_back(column_declaration); - } - - ASTStorage * ast_storage = table_storage_define->as(); - ASTs storage_children = ast_storage->children; - auto storage_engine_arguments = ast_storage->engine->arguments; - /// Add table_name to engine arguments - storage_engine_arguments->children.insert(storage_engine_arguments->children.begin() + 2, std::make_shared(table_id.table_name)); - - return create_table_query; -} - - - } #endif diff --git a/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.h b/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.h index 3c75af6cc9e..8b47b45bd75 100644 --- a/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.h +++ b/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.h @@ -66,12 +66,16 @@ public: void shutdown() override; + String getPostgreSQLDatabaseName() const { return remote_database_name; } + protected: ASTPtr getCreateTableQueryImpl(const String & table_name, ContextPtr local_context, bool throw_on_error) const override; private: void startSynchronization(); + String getTablesList() const; + bool is_attach; String remote_database_name; postgres::ConnectionInfo connection_info; diff --git a/src/Interpreters/DatabaseCatalog.cpp b/src/Interpreters/DatabaseCatalog.cpp index ee49333a332..104d2a2e5bf 100644 --- a/src/Interpreters/DatabaseCatalog.cpp +++ b/src/Interpreters/DatabaseCatalog.cpp @@ -30,6 +30,7 @@ #if USE_LIBPQXX # include +# include #endif namespace fs = std::filesystem; @@ -241,7 +242,9 @@ DatabaseAndTable DatabaseCatalog::getTableImpl( #if USE_LIBPQXX if (!context_->isInternalQuery() && (db_and_table.first->getEngineName() == "MaterializedPostgreSQL")) { - db_and_table.second = std::make_shared(std::move(db_and_table.second), getContext(), "", db_and_table.second->getStorageID().table_name); + db_and_table.second = std::make_shared(std::move(db_and_table.second), getContext(), + assert_cast(db_and_table.first.get())->getPostgreSQLDatabaseName(), + db_and_table.second->getStorageID().table_name); } #endif diff --git a/src/Interpreters/InterpreterAlterQuery.cpp b/src/Interpreters/InterpreterAlterQuery.cpp index 2b4c5f3a8fa..0a40275b4bb 100644 --- a/src/Interpreters/InterpreterAlterQuery.cpp +++ b/src/Interpreters/InterpreterAlterQuery.cpp @@ -43,14 +43,12 @@ InterpreterAlterQuery::InterpreterAlterQuery(const ASTPtr & query_ptr_, ContextP BlockIO InterpreterAlterQuery::execute() { const auto & alter = query_ptr->as(); - std::cerr << "\n\n\n" << query_ptr->dumpTree() << std::endl; if (alter.alter_object == ASTAlterQuery::AlterObjectType::DATABASE) return executeToDatabase(alter); - else if (alter.alter_object == ASTAlterQuery::AlterObjectType::DATABASE) + else if (alter.alter_object == ASTAlterQuery::AlterObjectType::TABLE + || alter.alter_object == ASTAlterQuery::AlterObjectType::LIVE_VIEW) return executeToTable(alter); - else if (alter.alter_object == ASTAlterQuery::AlterObjectType::LIVE_VIEW) - return executeToTable(alter); - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown alter"); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown alter object type"); } diff --git a/src/Parsers/ASTAlterQuery.cpp b/src/Parsers/ASTAlterQuery.cpp index d0adb2e799f..80a0be6b704 100644 --- a/src/Parsers/ASTAlterQuery.cpp +++ b/src/Parsers/ASTAlterQuery.cpp @@ -494,6 +494,11 @@ void ASTAlterQuery::formatQueryImpl(const FormatSettings & settings, FormatState } settings.ostr << indent_str << backQuoteIfNeed(table); } + else if (alter_object == AlterObjectType::DATABASE && !database.empty()) + { + settings.ostr << indent_str << backQuoteIfNeed(database); + } + formatOnCluster(settings); settings.ostr << settings.nl_or_ws; diff --git a/src/Parsers/ASTAlterQuery.h b/src/Parsers/ASTAlterQuery.h index d99bac80fed..4f5cff5e0e0 100644 --- a/src/Parsers/ASTAlterQuery.h +++ b/src/Parsers/ASTAlterQuery.h @@ -218,7 +218,6 @@ public: UNKNOWN, }; - // bool is_live_view{false}; /// true for ALTER LIVE VIEW AlterObjectType alter_object = AlterObjectType::UNKNOWN; ASTExpressionList * command_list = nullptr; diff --git a/src/Parsers/ParserAlterQuery.cpp b/src/Parsers/ParserAlterQuery.cpp index e8e0db8c492..da9c533f5cc 100644 --- a/src/Parsers/ParserAlterQuery.cpp +++ b/src/Parsers/ParserAlterQuery.cpp @@ -832,14 +832,11 @@ bool ParserAlterQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (alter_object_type == ASTAlterQuery::AlterObjectType::DATABASE) { - std::cerr << "\n\n\nOK!\n\n"; if (!parseDatabase(pos, expected, query->database)) return false; - std::cerr << "database name: " << query->database << std::endl; } else { - std::cerr << "\n\n\nNOT OK!\n\n"; if (!parseDatabaseAndTableName(pos, expected, query->database, query->table)) return false; @@ -859,7 +856,6 @@ bool ParserAlterQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) query->set(query->command_list, command_list); query->alter_object = alter_object_type; - std::cerr << "\n\n\nalter query: " << query->dumpTree() << std::endl; return true; } diff --git a/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp b/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp index f9c448e9a5d..62ed7953c72 100644 --- a/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp +++ b/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp @@ -361,8 +361,6 @@ void MaterializedPostgreSQLConsumer::processReplicationMessage(const char * repl constexpr size_t transaction_commit_timestamp_len = 8; pos += unused_flags_len + commit_lsn_len + transaction_end_lsn_len + transaction_commit_timestamp_len; - // LOG_DEBUG(log, "Current lsn: {} = {}", current_lsn, getLSNValue(current_lsn)); /// Will be removed - final_lsn = current_lsn; break; } @@ -606,7 +604,7 @@ void MaterializedPostgreSQLConsumer::addNested(const String & postgres_table_nam buffers.emplace(postgres_table_name, Buffer(nested_storage)); /// Replication consumer will read wall and check for currently processed table whether it is allowed to start applying - /// changed to this table. + /// changes to this table. waiting_list[postgres_table_name] = table_start_lsn; } @@ -749,7 +747,9 @@ bool MaterializedPostgreSQLConsumer::consume(std::vector(fetchPostgreSQLTableStructure(tx, table_name, true, true, true)); - - auto engine = std::make_shared(); - engine->name = "MaterializedPostgreSQL"; - engine->arguments = args; - - auto ast_storage = std::make_shared(); - storage->set(storage->engine, engine); - - auto storage_def = storage->getCreateNestedTableQuery(std::move(table_structure)); - ContextMutablePtr local_context = Context::createCopy(context); - auto table = createTableFromAST(*assert_cast(storage_def.get()), remote_database_name, "", local_context, false).second; - - storage->setInMemoryMetadata(table->getInMemoryMetadata()); + return storage->getCreateNestedTableQuery(std::move(table_structure)); } @@ -622,26 +610,28 @@ void PostgreSQLReplicationHandler::addTableToReplication(StorageMaterializedPost { /// Note: we have to ensure that replication consumer task is stopped when we reload table, because otherwise /// it can read wal beyond start lsn position (from which this table is being loaded), which will result in loosing data. - /// Therefore we wait here for it to finish current reading stream. We have to wait, because we cannot return OK to client right now. consumer_task->deactivate(); - try { + LOG_TRACE(log, "Adding table `{}` to replication", postgres_table_name); postgres::Connection replication_connection(connection_info, /* replication */true); String snapshot_name, start_lsn; StoragePtr nested_storage; { pqxx::nontransaction tx(replication_connection.getRef()); - auto table_structure = std::make_unique(fetchPostgreSQLTableStructure(tx, postgres_table_name, true, true, true)); - if (isReplicationSlotExist(tx, start_lsn, /* temporary */true)) dropReplicationSlot(tx, /* temporary */true); createReplicationSlot(tx, start_lsn, snapshot_name, /* temporary */true); + /// Protect against deadlock. + auto nested = DatabaseCatalog::instance().tryGetTable(materialized_storage->getNestedStorageID(), materialized_storage->getNestedTableContext()); + if (!nested) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Internal table was not created"); + { postgres::Connection tmp_connection(connection_info); - nested_storage = loadFromSnapshot(tmp_connection, snapshot_name, postgres_table_name, materialized_storage->as ()); + nested_storage = loadFromSnapshot(tmp_connection, snapshot_name, postgres_table_name, materialized_storage); } auto nested_table_id = nested_storage->getStorageID(); materialized_storage->setNestedStorageID(nested_table_id); @@ -655,6 +645,7 @@ void PostgreSQLReplicationHandler::addTableToReplication(StorageMaterializedPost /// Pass storage to consumer and lsn position, from which to start receiving replication messages for this table. consumer->addNested(postgres_table_name, nested_storage, start_lsn); + LOG_TRACE(log, "Table `{}` successfully added to replication", postgres_table_name); } catch (...) { @@ -664,7 +655,6 @@ void PostgreSQLReplicationHandler::addTableToReplication(StorageMaterializedPost throw Exception(ErrorCodes::POSTGRESQL_REPLICATION_INTERNAL_ERROR, "Failed to add table `{}` to replication. Info: {}", postgres_table_name, error_message); } - consumer_task->schedule(); } diff --git a/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.h b/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.h index 37ea6b2cbea..a0c78b5d425 100644 --- a/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.h +++ b/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.h @@ -3,6 +3,7 @@ #include "MaterializedPostgreSQLConsumer.h" #include #include +#include namespace DB @@ -43,9 +44,9 @@ public: /// Start replication setup immediately. void startSynchronization(bool throw_on_error); - void addTableToReplication(StorageMaterializedPostgreSQL * storage, const String & postgres_table_name); + ASTPtr getCreateNestedTableQuery(StorageMaterializedPostgreSQL * storage, const String & table_name); - void addStructureToMaterializedStorage(StorageMaterializedPostgreSQL * storage, const String & table_name); + void addTableToReplication(StorageMaterializedPostgreSQL * storage, const String & postgres_table_name); private: using MaterializedStorages = std::unordered_map; diff --git a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp index 5ff00f9babb..1bf3c5ce1ca 100644 --- a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp +++ b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp @@ -173,6 +173,9 @@ StorageID StorageMaterializedPostgreSQL::getNestedStorageID() const void StorageMaterializedPostgreSQL::createNestedIfNeeded(PostgreSQLTableStructurePtr table_structure) { + if (tryGetNested()) + return; + const auto ast_create = getCreateNestedTableQuery(std::move(table_structure)); auto table_id = getStorageID(); auto tmp_nested_table_id = StorageID(table_id.database_name, getNestedTableName()); @@ -477,9 +480,10 @@ void registerStorageMaterializedPostgreSQL(StorageFactory & factory) postgresql_replication_settings->loadFromQuery(*args.storage_def); if (engine_args.size() != 5) - throw Exception("Storage MaterializedPostgreSQL requires 5 parameters: " - "PostgreSQL('host:port', 'database', 'table', 'username', 'password'", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Storage MaterializedPostgreSQL requires 5 parameters: " + "PostgreSQL('host:port', 'database', 'table', 'username', 'password'. Got {}", + engine_args.size()); for (auto & engine_arg : engine_args) engine_arg = evaluateConstantExpressionOrIdentifierAsLiteral(engine_arg, args.getContext()); diff --git a/tests/integration/test_postgresql_replica_database_engine/test.py b/tests/integration/test_postgresql_replica_database_engine/test.py index 40324089b1b..4d407579b52 100644 --- a/tests/integration/test_postgresql_replica_database_engine/test.py +++ b/tests/integration/test_postgresql_replica_database_engine/test.py @@ -923,6 +923,88 @@ def test_abrupt_server_restart_while_heavy_replication(started_cluster): cursor.execute('drop table if exists postgresql_replica_{};'.format(i)) +def test_add_new_table_to_replication(started_cluster): + drop_materialized_db() + conn = get_postgres_conn(ip=started_cluster.postgres_ip, + port=started_cluster.postgres_port, + database=True) + cursor = conn.cursor() + NUM_TABLES = 5 + + for i in range(NUM_TABLES): + create_postgres_table(cursor, 'postgresql_replica_{}'.format(i)); + instance.query("INSERT INTO postgres_database.postgresql_replica_{} SELECT number, {} from numbers(10000)".format(i, i)) + + create_materialized_db(ip=started_cluster.postgres_ip, + port=started_cluster.postgres_port) + + for i in range(NUM_TABLES): + table_name = 'postgresql_replica_{}'.format(i) + check_tables_are_synchronized(table_name); + + result = instance.query("SHOW TABLES FROM test_database") + assert(result == "postgresql_replica_0\npostgresql_replica_1\npostgresql_replica_2\npostgresql_replica_3\npostgresql_replica_4\n") + + table_name = 'postgresql_replica_5' + create_postgres_table(cursor, table_name) + instance.query("INSERT INTO postgres_database.{} SELECT number, number from numbers(10000)".format(table_name)) + + result = instance.query('SHOW CREATE DATABASE test_database') + assert(result[:63] == "CREATE DATABASE test_database\\nENGINE = MaterializedPostgreSQL(") # Check without ip + assert(result[-59:] == "\\'postgres_database\\', \\'postgres\\', \\'mysecretpassword\\')\n") + + result = instance.query_and_get_error("ALTER DATABASE test_database MODIFY SETTING materialized_postgresql_tables_list='tabl1'") + assert('Changing setting `materialized_postgresql_tables_list` is not allowed' in result) + + result = instance.query_and_get_error("ALTER DATABASE test_database MODIFY SETTING materialized_postgresql_tables='tabl1'") + assert('Database engine MaterializedPostgreSQL does not support setting' in result) + + instance.query("ATTACH TABLE test_database.{}".format(table_name)); + + result = instance.query("SHOW TABLES FROM test_database") + assert(result == "postgresql_replica_0\npostgresql_replica_1\npostgresql_replica_2\npostgresql_replica_3\npostgresql_replica_4\npostgresql_replica_5\n") + + check_tables_are_synchronized(table_name); + instance.query("INSERT INTO postgres_database.{} SELECT number, number from numbers(10000, 10000)".format(table_name)) + check_tables_are_synchronized(table_name); + + result = instance.query_and_get_error("ATTACH TABLE test_database.{}".format(table_name)); + assert('Table test_database.postgresql_replica_5 already exists' in result) + + result = instance.query_and_get_error("ATTACH TABLE test_database.unknown_table"); + assert('PostgreSQL table unknown_table does not exist' in result) + + result = instance.query('SHOW CREATE DATABASE test_database') + assert(result[:63] == "CREATE DATABASE test_database\\nENGINE = MaterializedPostgreSQL(") + assert(result[-180:] == ")\\nSETTINGS materialized_postgresql_tables_list = \\'postgresql_replica_0,postgresql_replica_1,postgresql_replica_2,postgresql_replica_3,postgresql_replica_4,postgresql_replica_5\\'\n") + + table_name = 'postgresql_replica_6' + create_postgres_table(cursor, table_name) + instance.query("INSERT INTO postgres_database.{} SELECT number, number from numbers(10000)".format(table_name)) + instance.query("ATTACH TABLE test_database.{}".format(table_name)); + + instance.restart_clickhouse() + + table_name = 'postgresql_replica_7' + create_postgres_table(cursor, table_name) + instance.query("INSERT INTO postgres_database.{} SELECT number, number from numbers(10000)".format(table_name)) + instance.query("ATTACH TABLE test_database.{}".format(table_name)); + + result = instance.query('SHOW CREATE DATABASE test_database') + assert(result[:63] == "CREATE DATABASE test_database\\nENGINE = MaterializedPostgreSQL(") + assert(result[-222:] == ")\\nSETTINGS materialized_postgresql_tables_list = \\'postgresql_replica_0,postgresql_replica_1,postgresql_replica_2,postgresql_replica_3,postgresql_replica_4,postgresql_replica_5,postgresql_replica_6,postgresql_replica_7\\'\n") + + result = instance.query("SHOW TABLES FROM test_database") + assert(result == "postgresql_replica_0\npostgresql_replica_1\npostgresql_replica_2\npostgresql_replica_3\npostgresql_replica_4\npostgresql_replica_5\npostgresql_replica_6\npostgresql_replica_7\n") + + for i in range(NUM_TABLES + 3): + table_name = 'postgresql_replica_{}'.format(i) + check_tables_are_synchronized(table_name); + + for i in range(NUM_TABLES + 3): + cursor.execute('drop table if exists postgresql_replica_{};'.format(i)) + + if __name__ == '__main__': cluster.start() input("Cluster created, press any key to destroy...") From a2b0996ac398ebb66c0cb514c242fc7ff06b612c Mon Sep 17 00:00:00 2001 From: kssenii Date: Sat, 28 Aug 2021 16:42:36 +0300 Subject: [PATCH 08/80] Dynamically remove tables --- .../DatabaseMaterializedPostgreSQL.cpp | 102 ++++++++++++++---- .../DatabaseMaterializedPostgreSQL.h | 10 +- .../MaterializedPostgreSQLConsumer.cpp | 11 ++ .../MaterializedPostgreSQLConsumer.h | 5 + .../PostgreSQLReplicationHandler.cpp | 27 +++++ .../PostgreSQL/PostgreSQLReplicationHandler.h | 2 + .../test.py | 59 ++++++++++ 7 files changed, 192 insertions(+), 24 deletions(-) diff --git a/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.cpp b/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.cpp index 4575a1d7270..7d378355d64 100644 --- a/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.cpp +++ b/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.cpp @@ -33,6 +33,7 @@ namespace ErrorCodes extern const int NOT_IMPLEMENTED; extern const int LOGICAL_ERROR; extern const int QUERY_NOT_ALLOWED; + extern const int UNKNOWN_TABLE; } DatabaseMaterializedPostgreSQL::DatabaseMaterializedPostgreSQL( @@ -218,13 +219,17 @@ void DatabaseMaterializedPostgreSQL::createTable(ContextPtr local_context, const } -String DatabaseMaterializedPostgreSQL::getTablesList() const +String DatabaseMaterializedPostgreSQL::getTablesList(const String & except) const { String tables_list; for (const auto & table : materialized_tables) { + if (table.first == except) + continue; + if (!tables_list.empty()) tables_list += ','; + tables_list += table.first; } return tables_list; @@ -243,38 +248,45 @@ ASTPtr DatabaseMaterializedPostgreSQL::getCreateTableQueryImpl(const String & ta } +ASTPtr DatabaseMaterializedPostgreSQL::createAlterSettingsQuery(const SettingChange & new_setting) +{ + auto set = std::make_shared(); + set->is_standalone = false; + set->changes = {new_setting}; + + auto command = std::make_shared(); + command->type = ASTAlterCommand::Type::MODIFY_DATABASE_SETTING; + command->settings_changes = std::move(set); + + auto command_list = std::make_shared(); + command_list->children.push_back(command); + + auto query = std::make_shared(); + auto * alter = query->as(); + + alter->alter_object = ASTAlterQuery::AlterObjectType::DATABASE; + alter->database = database_name; + alter->set(alter->command_list, command_list); + + return query; +} + + void DatabaseMaterializedPostgreSQL::attachTable(const String & table_name, const StoragePtr & table, const String & relative_table_path) { if (CurrentThread::isInitialized() && CurrentThread::get().getQueryContext()) { - auto set = std::make_shared(); - set->is_standalone = false; - auto tables_to_replicate = settings->materialized_postgresql_tables_list.value; if (tables_to_replicate.empty()) tables_to_replicate = getTablesList(); /// tables_to_replicate can be empty if postgres database had no tables when this database was created. - set->changes = {SettingChange("materialized_postgresql_tables_list", - tables_to_replicate.empty() ? table_name : (tables_to_replicate + "," + table_name))}; - - auto command = std::make_shared(); - command->type = ASTAlterCommand::Type::MODIFY_DATABASE_SETTING; - command->settings_changes = std::move(set); - - auto command_list = std::make_shared(); - command_list->children.push_back(command); - - auto query = std::make_shared(); - auto * alter = query->as(); - - alter->alter_object = ASTAlterQuery::AlterObjectType::DATABASE; - alter->database = database_name; - alter->set(alter->command_list, command_list); + SettingChange new_setting("materialized_postgresql_tables_list", tables_to_replicate.empty() ? table_name : (tables_to_replicate + "," + table_name)); + auto alter_query = createAlterSettingsQuery(new_setting); auto current_context = Context::createCopy(getContext()->getGlobalContext()); current_context->setInternalQuery(true); - InterpreterAlterQuery(query, current_context).execute(); + InterpreterAlterQuery(alter_query, current_context).execute(); auto storage = StorageMaterializedPostgreSQL::create(table, getContext(), remote_database_name, table_name); materialized_tables[table_name] = storage; @@ -287,6 +299,54 @@ void DatabaseMaterializedPostgreSQL::attachTable(const String & table_name, cons } +StoragePtr DatabaseMaterializedPostgreSQL::detachTable(const String & table_name) +{ + if (CurrentThread::isInitialized() && CurrentThread::get().getQueryContext()) + { + auto & table_to_delete = materialized_tables[table_name]; + if (!table_to_delete) + throw Exception(ErrorCodes::UNKNOWN_TABLE, "Materialized table `{}` does not exist", table_name); + + auto tables_to_replicate = getTablesList(table_name); + + /// tables_to_replicate can be empty if postgres database had no tables when this database was created. + SettingChange new_setting("materialized_postgresql_tables_list", tables_to_replicate); + auto alter_query = createAlterSettingsQuery(new_setting); + + { + auto current_context = Context::createCopy(getContext()->getGlobalContext()); + current_context->setInternalQuery(true); + InterpreterAlterQuery(alter_query, current_context).execute(); + } + + auto nested = table_to_delete->as()->getNested(); + if (!nested) + throw Exception(ErrorCodes::UNKNOWN_TABLE, "Inner table `{}` does not exist", table_name); + + replication_handler->removeTableFromReplication(table_name); + + try + { + auto current_context = Context::createCopy(getContext()->getGlobalContext()); + current_context->makeQueryContext(); + DatabaseAtomic::dropTable(current_context, table_name, true); + } + catch (Exception & e) + { + e.addMessage("while removing table `" + table_name + "` from replication"); + throw; + } + + materialized_tables.erase(table_name); + return nullptr; + } + else + { + return DatabaseAtomic::detachTable(table_name); + } +} + + void DatabaseMaterializedPostgreSQL::shutdown() { stopReplication(); diff --git a/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.h b/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.h index 8b47b45bd75..40abb24cccf 100644 --- a/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.h +++ b/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.h @@ -50,9 +50,11 @@ public: StoragePtr tryGetTable(const String & name, ContextPtr context) const override; - void createTable(ContextPtr context, const String & name, const StoragePtr & table, const ASTPtr & query) override; + void createTable(ContextPtr context, const String & table_name, const StoragePtr & table, const ASTPtr & query) override; - void attachTable(const String & name, const StoragePtr & table, const String & relative_table_path) override; + void attachTable(const String & table_name, const StoragePtr & table, const String & relative_table_path) override; + + StoragePtr detachTable(const String & table_name) override; void dropTable(ContextPtr local_context, const String & name, bool no_delay) override; @@ -74,7 +76,9 @@ protected: private: void startSynchronization(); - String getTablesList() const; + ASTPtr createAlterSettingsQuery(const SettingChange & change); + + String getTablesList(const String & except = {}) const; bool is_attach; String remote_database_name; diff --git a/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp b/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp index 62ed7953c72..131ed066b66 100644 --- a/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp +++ b/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp @@ -528,6 +528,9 @@ String MaterializedPostgreSQLConsumer::advanceLSN(std::shared_ptr waiting_list; + + /// Since replication may be some time behind, we need to ensure that replication messages for deleted tables are ignored. + std::unordered_set deleted_tables; }; } diff --git a/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp b/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp index dc297c9d059..06ad79c5750 100644 --- a/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp +++ b/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp @@ -659,6 +659,33 @@ void PostgreSQLReplicationHandler::addTableToReplication(StorageMaterializedPost } +void PostgreSQLReplicationHandler::removeTableFromReplication(const String & postgres_table_name) +{ + consumer_task->deactivate(); + try + { + postgres::Connection replication_connection(connection_info, /* replication */true); + + { + pqxx::nontransaction tx(replication_connection.getRef()); + removeTableFromPublication(tx, postgres_table_name); + } + + /// Pass storage to consumer and lsn position, from which to start receiving replication messages for this table. + consumer->removeNested(postgres_table_name); + } + catch (...) + { + consumer_task->scheduleAfter(RESCHEDULE_MS); + + auto error_message = getCurrentExceptionMessage(false); + throw Exception(ErrorCodes::POSTGRESQL_REPLICATION_INTERNAL_ERROR, + "Failed to remove table `{}` from replication. Info: {}", postgres_table_name, error_message); + } + consumer_task->schedule(); +} + + void PostgreSQLReplicationHandler::reloadFromSnapshot(const std::vector> & relation_data) { /// If table schema has changed, the table stops consuming changes from replication stream. diff --git a/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.h b/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.h index a0c78b5d425..047802d10e1 100644 --- a/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.h +++ b/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.h @@ -48,6 +48,8 @@ public: void addTableToReplication(StorageMaterializedPostgreSQL * storage, const String & postgres_table_name); + void removeTableFromReplication(const String & postgres_table_name); + private: using MaterializedStorages = std::unordered_map; diff --git a/tests/integration/test_postgresql_replica_database_engine/test.py b/tests/integration/test_postgresql_replica_database_engine/test.py index 4d407579b52..0ccc6ad7f2d 100644 --- a/tests/integration/test_postgresql_replica_database_engine/test.py +++ b/tests/integration/test_postgresql_replica_database_engine/test.py @@ -1005,6 +1005,65 @@ def test_add_new_table_to_replication(started_cluster): cursor.execute('drop table if exists postgresql_replica_{};'.format(i)) +def test_remove_table_from_replication(started_cluster): + drop_materialized_db() + conn = get_postgres_conn(ip=started_cluster.postgres_ip, + port=started_cluster.postgres_port, + database=True) + cursor = conn.cursor() + NUM_TABLES = 5 + + for i in range(NUM_TABLES): + create_postgres_table(cursor, 'postgresql_replica_{}'.format(i)); + instance.query("INSERT INTO postgres_database.postgresql_replica_{} SELECT number, {} from numbers(10000)".format(i, i)) + + create_materialized_db(ip=started_cluster.postgres_ip, + port=started_cluster.postgres_port) + + for i in range(NUM_TABLES): + table_name = 'postgresql_replica_{}'.format(i) + check_tables_are_synchronized(table_name); + + result = instance.query("SHOW TABLES FROM test_database") + assert(result == "postgresql_replica_0\npostgresql_replica_1\npostgresql_replica_2\npostgresql_replica_3\npostgresql_replica_4\n") + + result = instance.query('SHOW CREATE DATABASE test_database') + assert(result[:63] == "CREATE DATABASE test_database\\nENGINE = MaterializedPostgreSQL(") + assert(result[-59:] == "\\'postgres_database\\', \\'postgres\\', \\'mysecretpassword\\')\n") + + table_name = 'postgresql_replica_4' + instance.query('DETACH TABLE test_database.{}'.format(table_name)); + result = instance.query_and_get_error('SELECT * FROM test_database.{}'.format(table_name)) + assert("doesn't exist" in result) + + result = instance.query("SHOW TABLES FROM test_database") + assert(result == "postgresql_replica_0\npostgresql_replica_1\npostgresql_replica_2\npostgresql_replica_3\n") + + result = instance.query('SHOW CREATE DATABASE test_database') + assert(result[:63] == "CREATE DATABASE test_database\\nENGINE = MaterializedPostgreSQL(") + assert(result[-138:] == ")\\nSETTINGS materialized_postgresql_tables_list = \\'postgresql_replica_0,postgresql_replica_1,postgresql_replica_2,postgresql_replica_3\\'\n") + + instance.query('ATTACH TABLE test_database.{}'.format(table_name)); + check_tables_are_synchronized(table_name); + + for i in range(NUM_TABLES): + table_name = 'postgresql_replica_{}'.format(i) + check_tables_are_synchronized(table_name); + + result = instance.query('SHOW CREATE DATABASE test_database') + assert(result[:63] == "CREATE DATABASE test_database\\nENGINE = MaterializedPostgreSQL(") + assert(result[-159:] == ")\\nSETTINGS materialized_postgresql_tables_list = \\'postgresql_replica_0,postgresql_replica_1,postgresql_replica_2,postgresql_replica_3,postgresql_replica_4\\'\n") + + table_name = 'postgresql_replica_1' + instance.query('DETACH TABLE test_database.{}'.format(table_name)); + result = instance.query('SHOW CREATE DATABASE test_database') + assert(result[:63] == "CREATE DATABASE test_database\\nENGINE = MaterializedPostgreSQL(") + assert(result[-138:] == ")\\nSETTINGS materialized_postgresql_tables_list = \\'postgresql_replica_0,postgresql_replica_2,postgresql_replica_3,postgresql_replica_4\\'\n") + + for i in range(NUM_TABLES): + cursor.execute('drop table if exists postgresql_replica_{};'.format(i)) + + if __name__ == '__main__': cluster.start() input("Cluster created, press any key to destroy...") From fd621381c747133cf0f778142dca8f7961117019 Mon Sep 17 00:00:00 2001 From: kssenii Date: Sat, 28 Aug 2021 16:56:39 +0300 Subject: [PATCH 09/80] Allow modify some other settings --- .../PostgreSQL/DatabaseMaterializedPostgreSQL.cpp | 6 ++---- .../PostgreSQL/MaterializedPostgreSQLConsumer.cpp | 10 ++++++++++ .../PostgreSQL/MaterializedPostgreSQLConsumer.h | 5 ++++- .../PostgreSQL/PostgreSQLReplicationHandler.cpp | 8 ++++++++ src/Storages/PostgreSQL/PostgreSQLReplicationHandler.h | 3 +++ 5 files changed, 27 insertions(+), 5 deletions(-) diff --git a/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.cpp b/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.cpp index 7d378355d64..bcd80fe62e7 100644 --- a/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.cpp +++ b/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.cpp @@ -151,11 +151,9 @@ void DatabaseMaterializedPostgreSQL::applySettings(const SettingsChanges & setti if (!local_context->isInternalQuery()) throw Exception(ErrorCodes::QUERY_NOT_ALLOWED, "Changing setting `{}` is not allowed", change.name); } - else if (change.name == "materialized_postgresql_allow_automatic_update") - { - } - else if (change.name == "materialized_postgresql_max_block_size") + else if ((change.name == "materialized_postgresql_allow_automatic_update") || (change.name == "materialized_postgresql_max_block_size")) { + replication_handler->setSetting(change); } else { diff --git a/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp b/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp index 131ed066b66..c992699a206 100644 --- a/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp +++ b/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp @@ -8,6 +8,7 @@ #include #include #include +#include namespace DB @@ -634,6 +635,15 @@ void MaterializedPostgreSQLConsumer::removeNested(const String & postgres_table_ } +void MaterializedPostgreSQLConsumer::setSetting(const SettingChange & setting) +{ + if (setting.name == "materialized_postgresql_max_block_size") + max_block_size = setting.value.safeGet(); + else if (setting.name == "materialized_postgresql_allow_automatic_update") + allow_automatic_update = setting.value.safeGet(); +} + + /// Read binary changes from replication slot via COPY command (starting from current lsn in a slot). bool MaterializedPostgreSQLConsumer::readFromReplicationSlot() { diff --git a/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.h b/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.h index a7ea337f056..88f4ff4b4da 100644 --- a/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.h +++ b/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.h @@ -13,6 +13,7 @@ namespace DB { +struct SettingChange; class MaterializedPostgreSQLConsumer { @@ -40,6 +41,8 @@ public: void removeNested(const String & postgres_table_name); + void setSetting(const SettingChange & setting); + private: /// Read approximarely up to max_block_size changes from WAL. bool readFromReplicationSlot(); @@ -110,7 +113,7 @@ private: /// current_lsn converted from String to Int64 via getLSNValue(). UInt64 lsn_value; - const size_t max_block_size; + size_t max_block_size; bool allow_automatic_update; String table_to_insert; diff --git a/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp b/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp index 06ad79c5750..8974ad93149 100644 --- a/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp +++ b/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp @@ -451,6 +451,14 @@ void PostgreSQLReplicationHandler::removeTableFromPublication(pqxx::nontransacti } +void PostgreSQLReplicationHandler::setSetting(const SettingChange & setting) +{ + consumer_task->deactivate(); + consumer->setSetting(setting); + consumer_task->schedule(); +} + + void PostgreSQLReplicationHandler::shutdownFinal() { try diff --git a/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.h b/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.h index 047802d10e1..18991b0d561 100644 --- a/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.h +++ b/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.h @@ -10,6 +10,7 @@ namespace DB { class StorageMaterializedPostgreSQL; +struct SettingChange; class PostgreSQLReplicationHandler { @@ -50,6 +51,8 @@ public: void removeTableFromReplication(const String & postgres_table_name); + void setSetting(const SettingChange & setting); + private: using MaterializedStorages = std::unordered_map; From cc90d787f8e91751d61c1227a6f298001d8e775a Mon Sep 17 00:00:00 2001 From: kssenii Date: Sat, 28 Aug 2021 17:30:22 +0300 Subject: [PATCH 10/80] Slightly better --- .../materialized-postgresql.md | 22 ++++++++++++++++++- src/Databases/DatabaseAtomic.cpp | 19 ++++++---------- 2 files changed, 28 insertions(+), 13 deletions(-) diff --git a/docs/en/engines/database-engines/materialized-postgresql.md b/docs/en/engines/database-engines/materialized-postgresql.md index 89c7c803bb3..986396019da 100644 --- a/docs/en/engines/database-engines/materialized-postgresql.md +++ b/docs/en/engines/database-engines/materialized-postgresql.md @@ -23,6 +23,20 @@ ENGINE = MaterializedPostgreSQL('host:port', ['database' | database], 'user', 'p - `user` — PostgreSQL user. - `password` — User password. +## Dynamicaly adding new tables to replication + +``` sql +ATTACH TABLE postgres_database.new_table; +``` + +It will work as well if there is a setting `materialized_postgresql_tables_list`. + +## Dynamicaly removing tables from replication + +``` sql +DETACH TABLE postgres_database.table_to_remove; +``` + ## Settings {#settings} - [materialized_postgresql_max_block_size](../../operations/settings/settings.md#materialized-postgresql-max-block-size) @@ -40,6 +54,12 @@ SETTINGS materialized_postgresql_max_block_size = 65536, SELECT * FROM database1.table1; ``` +It is also possible to change settings at run time. + +``` sql +ALTER DATABASE postgres_database MODIFY SETTING materialized_postgresql_max_block_size = ; +``` + ## Requirements {#requirements} 1. The [wal_level](https://www.postgresql.org/docs/current/runtime-config-wal.html) setting must have a value `logical` and `max_replication_slots` parameter must have a value at least `2` in the PostgreSQL config file. @@ -73,7 +93,7 @@ WHERE oid = 'postgres_table'::regclass; !!! warning "Warning" Replication of [**TOAST**](https://www.postgresql.org/docs/9.5/storage-toast.html) values is not supported. The default value for the data type will be used. - + ## Example of Use {#example-of-use} ``` sql diff --git a/src/Databases/DatabaseAtomic.cpp b/src/Databases/DatabaseAtomic.cpp index 5102c924140..e16db6477d8 100644 --- a/src/Databases/DatabaseAtomic.cpp +++ b/src/Databases/DatabaseAtomic.cpp @@ -572,14 +572,9 @@ void DatabaseAtomic::modifySettings(const SettingsChanges & settings_changes, Co { applySettings(settings_changes, local_context); - ASTCreateQuery create; - create.attach = true; - create.database = "_"; - create.uuid = getUUID(); - create.if_not_exists = false; - create.storage = assert_cast(storage_def.get()); - auto * settings = create.storage->settings; - + auto create_query = getCreateDatabaseQuery()->clone(); + auto create = create_query->as(); + auto * settings = create->storage->settings; if (settings) { auto & previous_settings = settings->changes; @@ -598,14 +593,14 @@ void DatabaseAtomic::modifySettings(const SettingsChanges & settings_changes, Co auto settings = std::make_shared(); settings->is_standalone = false; settings->changes = settings_changes; - create.storage->set(create.storage->settings, settings->clone()); + create->storage->set(create->storage->settings, settings->clone()); } - create.attach = true; - create.if_not_exists = false; + create->attach = true; + create->if_not_exists = false; WriteBufferFromOwnString statement_buf; - formatAST(create, statement_buf, false); + formatAST(*create, statement_buf, false); writeChar('\n', statement_buf); String statement = statement_buf.str(); From 378e1f74aa2bdbaf094e691540db17288cfc051e Mon Sep 17 00:00:00 2001 From: kssenii Date: Sat, 28 Aug 2021 19:09:35 +0300 Subject: [PATCH 11/80] Fix tests --- .../en/engines/database-engines/materialized-postgresql.md | 4 ++-- src/Access/AccessType.h | 3 ++- .../PostgreSQL/DatabaseMaterializedPostgreSQL.cpp | 1 + src/Interpreters/InterpreterAlterQuery.cpp | 2 +- src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp | 7 ++++--- tests/queries/0_stateless/01271_show_privileges.reference | 2 ++ 6 files changed, 12 insertions(+), 7 deletions(-) diff --git a/docs/en/engines/database-engines/materialized-postgresql.md b/docs/en/engines/database-engines/materialized-postgresql.md index 986396019da..2272246a7c5 100644 --- a/docs/en/engines/database-engines/materialized-postgresql.md +++ b/docs/en/engines/database-engines/materialized-postgresql.md @@ -23,7 +23,7 @@ ENGINE = MaterializedPostgreSQL('host:port', ['database' | database], 'user', 'p - `user` — PostgreSQL user. - `password` — User password. -## Dynamicaly adding new tables to replication +## Dynamically adding new tables to replication ``` sql ATTACH TABLE postgres_database.new_table; @@ -31,7 +31,7 @@ ATTACH TABLE postgres_database.new_table; It will work as well if there is a setting `materialized_postgresql_tables_list`. -## Dynamicaly removing tables from replication +## Dynamically removing tables from replication ``` sql DETACH TABLE postgres_database.table_to_remove; diff --git a/src/Access/AccessType.h b/src/Access/AccessType.h index 57342ee5503..823ecebe12d 100644 --- a/src/Access/AccessType.h +++ b/src/Access/AccessType.h @@ -71,9 +71,10 @@ enum class AccessType M(ALTER_FETCH_PARTITION, "ALTER FETCH PART, FETCH PARTITION", TABLE, ALTER_TABLE) \ M(ALTER_FREEZE_PARTITION, "FREEZE PARTITION, UNFREEZE", TABLE, ALTER_TABLE) \ \ - M(ALTER_DATABASE_SETTINGS, "ALTER DATABASE SETTING, ALTER MODIFY DATABASE SETTING, MODIFY DATABASE SETTING", TABLE, ALTER_TABLE) /* allows to execute ALTER MODIFY SETTING */\ + M(ALTER_DATABASE_SETTINGS, "ALTER DATABASE SETTING, ALTER MODIFY DATABASE SETTING, MODIFY DATABASE SETTING", DATABASE, ALTER_DATABASE) /* allows to execute ALTER MODIFY SETTING */\ \ M(ALTER_TABLE, "", GROUP, ALTER) \ + M(ALTER_DATABASE, "", GROUP, ALTER) \ \ M(ALTER_VIEW_REFRESH, "ALTER LIVE VIEW REFRESH, REFRESH VIEW", VIEW, ALTER_VIEW) \ M(ALTER_VIEW_MODIFY_QUERY, "ALTER TABLE MODIFY QUERY", VIEW, ALTER_VIEW) \ diff --git a/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.cpp b/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.cpp index bcd80fe62e7..6d4ac96e56a 100644 --- a/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.cpp +++ b/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.cpp @@ -34,6 +34,7 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; extern const int QUERY_NOT_ALLOWED; extern const int UNKNOWN_TABLE; + extern const int BAD_ARGUMENTS; } DatabaseMaterializedPostgreSQL::DatabaseMaterializedPostgreSQL( diff --git a/src/Interpreters/InterpreterAlterQuery.cpp b/src/Interpreters/InterpreterAlterQuery.cpp index 0a40275b4bb..9cc4e02c4c4 100644 --- a/src/Interpreters/InterpreterAlterQuery.cpp +++ b/src/Interpreters/InterpreterAlterQuery.cpp @@ -48,7 +48,7 @@ BlockIO InterpreterAlterQuery::execute() else if (alter.alter_object == ASTAlterQuery::AlterObjectType::TABLE || alter.alter_object == ASTAlterQuery::AlterObjectType::LIVE_VIEW) return executeToTable(alter); - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown alter object type"); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown alter object type"); } diff --git a/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp b/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp index 8974ad93149..24fafff4123 100644 --- a/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp +++ b/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp @@ -166,7 +166,8 @@ void PostgreSQLReplicationHandler::startSynchronization(bool throw_on_error) auto * materialized_storage = storage->as (); try { - /// TODO: THIS IS INCORRENT, we might get here if there is no nested, need to check and reload. + /// FIXME: Looks like it is possible we might get here if there is no nested storage or at least nested storage id field might be empty. + /// Caught it somehow when doing something else incorrectly, but do not see any reason how it could happen. /// Try load nested table, set materialized table metadata. nested_storages[table_name] = materialized_storage->prepare(); } @@ -617,7 +618,7 @@ PostgreSQLTableStructurePtr PostgreSQLReplicationHandler::fetchTableStructure( void PostgreSQLReplicationHandler::addTableToReplication(StorageMaterializedPostgreSQL * materialized_storage, const String & postgres_table_name) { /// Note: we have to ensure that replication consumer task is stopped when we reload table, because otherwise - /// it can read wal beyond start lsn position (from which this table is being loaded), which will result in loosing data. + /// it can read wal beyond start lsn position (from which this table is being loaded), which will result in losing data. consumer_task->deactivate(); try { @@ -663,7 +664,7 @@ void PostgreSQLReplicationHandler::addTableToReplication(StorageMaterializedPost throw Exception(ErrorCodes::POSTGRESQL_REPLICATION_INTERNAL_ERROR, "Failed to add table `{}` to replication. Info: {}", postgres_table_name, error_message); } - consumer_task->schedule(); + consumer_task->activateAndSchedule(); } diff --git a/tests/queries/0_stateless/01271_show_privileges.reference b/tests/queries/0_stateless/01271_show_privileges.reference index 46eb3bf9ba8..07f670e9afb 100644 --- a/tests/queries/0_stateless/01271_show_privileges.reference +++ b/tests/queries/0_stateless/01271_show_privileges.reference @@ -35,7 +35,9 @@ ALTER SETTINGS ['ALTER SETTING','ALTER MODIFY SETTING','MODIFY SETTING','RESET S ALTER MOVE PARTITION ['ALTER MOVE PART','MOVE PARTITION','MOVE PART'] TABLE ALTER TABLE ALTER FETCH PARTITION ['ALTER FETCH PART','FETCH PARTITION'] TABLE ALTER TABLE ALTER FREEZE PARTITION ['FREEZE PARTITION','UNFREEZE'] TABLE ALTER TABLE +ALTER DATABASE SETTINGS ['ALTER DATABASE SETTING','ALTER MODIFY DATABASE SETTING','MODIFY DATABASE SETTING'] DATABASE ALTER DATABASE ALTER TABLE [] \N ALTER +ALTER DATABASE [] \N ALTER ALTER VIEW REFRESH ['ALTER LIVE VIEW REFRESH','REFRESH VIEW'] VIEW ALTER VIEW ALTER VIEW MODIFY QUERY ['ALTER TABLE MODIFY QUERY'] VIEW ALTER VIEW ALTER VIEW [] \N ALTER From 9d0444774aa05c8c6bffb7eadaf03d1f231f652f Mon Sep 17 00:00:00 2001 From: kssenii Date: Sun, 29 Aug 2021 11:50:03 +0300 Subject: [PATCH 12/80] Fix tests --- src/Databases/DatabaseAtomic.cpp | 6 ++---- src/Databases/DatabaseAtomic.h | 2 +- src/Databases/DatabaseFactory.cpp | 2 +- src/Databases/IDatabase.h | 4 ++-- src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.cpp | 5 +++-- src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.h | 4 ++-- src/Storages/AlterCommands.cpp | 5 ++++- src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp | 2 +- 8 files changed, 16 insertions(+), 14 deletions(-) diff --git a/src/Databases/DatabaseAtomic.cpp b/src/Databases/DatabaseAtomic.cpp index e16db6477d8..cc350e58dcf 100644 --- a/src/Databases/DatabaseAtomic.cpp +++ b/src/Databases/DatabaseAtomic.cpp @@ -568,12 +568,10 @@ void DatabaseAtomic::checkDetachedTableNotInUse(const UUID & uuid) assertDetachedTableNotInUse(uuid); } -void DatabaseAtomic::modifySettings(const SettingsChanges & settings_changes, ContextPtr local_context) +void DatabaseAtomic::modifySettingsMetadata(const SettingsChanges & settings_changes, ContextPtr local_context) { - applySettings(settings_changes, local_context); - auto create_query = getCreateDatabaseQuery()->clone(); - auto create = create_query->as(); + auto * create = create_query->as(); auto * settings = create->storage->settings; if (settings) { diff --git a/src/Databases/DatabaseAtomic.h b/src/Databases/DatabaseAtomic.h index 0b00a4eb43a..b0910f29ab1 100644 --- a/src/Databases/DatabaseAtomic.h +++ b/src/Databases/DatabaseAtomic.h @@ -61,7 +61,7 @@ public: void checkDetachedTableNotInUse(const UUID & uuid) override; void setDetachedTableNotInUseForce(const UUID & uuid); - void modifySettings(const SettingsChanges & settings_changes, ContextPtr local_context) override; + void modifySettingsMetadata(const SettingsChanges & settings_changes, ContextPtr local_context) override; protected: void commitAlterTable(const StorageID & table_id, const String & table_metadata_tmp_path, const String & table_metadata_path, const String & statement, ContextPtr query_context) override; diff --git a/src/Databases/DatabaseFactory.cpp b/src/Databases/DatabaseFactory.cpp index cff71a0e7fd..962177f6f49 100644 --- a/src/Databases/DatabaseFactory.cpp +++ b/src/Databases/DatabaseFactory.cpp @@ -105,7 +105,7 @@ DatabasePtr DatabaseFactory::getImpl(const ASTCreateQuery & create, const String static const std::unordered_set database_engines{"Ordinary", "Atomic", "Memory", "Dictionary", "Lazy", "Replicated", "MySQL", "MaterializeMySQL", "MaterializedMySQL", - "Lazy", "Replicated", "PostgreSQL", "MaterializedPostgreSQL", "SQLite"}; + "PostgreSQL", "MaterializedPostgreSQL", "SQLite"}; if (!database_engines.contains(engine_name)) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Database engine name `{}` does not exist", engine_name); diff --git a/src/Databases/IDatabase.h b/src/Databases/IDatabase.h index f3f801e620b..d4e5903ad8c 100644 --- a/src/Databases/IDatabase.h +++ b/src/Databases/IDatabase.h @@ -279,12 +279,12 @@ public: throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Alter is not supported by database engine {}", getEngineName()); } - virtual void modifySettings(const SettingsChanges &, ContextPtr) + virtual void modifySettingsMetadata(const SettingsChanges &, ContextPtr) { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Database engine {} does not support settings", getEngineName()); } - virtual void applySettings(const SettingsChanges &, ContextPtr) + virtual void tryApplySettings(const SettingsChanges &, ContextPtr) { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Database engine {} does not support settings", getEngineName()); } diff --git a/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.cpp b/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.cpp index 6d4ac96e56a..f720154c1ad 100644 --- a/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.cpp +++ b/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.cpp @@ -140,7 +140,7 @@ void DatabaseMaterializedPostgreSQL::checkAlterIsPossible(const AlterCommands & } -void DatabaseMaterializedPostgreSQL::applySettings(const SettingsChanges & settings_changes, ContextPtr local_context) +void DatabaseMaterializedPostgreSQL::tryApplySettings(const SettingsChanges & settings_changes, ContextPtr local_context) { for (const auto & change : settings_changes) { @@ -208,7 +208,7 @@ void DatabaseMaterializedPostgreSQL::createTable(ContextPtr local_context, const /// Create ReplacingMergeTree table. auto query_copy = query->clone(); - auto create_query = assert_cast(query_copy.get()); + auto * create_query = assert_cast(query_copy.get()); create_query->attach = false; create_query->attach_short_syntax = false; DatabaseAtomic::createTable(StorageMaterializedPostgreSQL::makeNestedTableContext(local_context), table_name, table, query_copy); @@ -273,6 +273,7 @@ ASTPtr DatabaseMaterializedPostgreSQL::createAlterSettingsQuery(const SettingCha void DatabaseMaterializedPostgreSQL::attachTable(const String & table_name, const StoragePtr & table, const String & relative_table_path) { + /// TODO: If attach fails, need to delete nested... if (CurrentThread::isInitialized() && CurrentThread::get().getQueryContext()) { auto tables_to_replicate = settings->materialized_postgresql_tables_list.value; diff --git a/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.h b/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.h index 40abb24cccf..c33b6274e7c 100644 --- a/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.h +++ b/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.h @@ -64,7 +64,7 @@ public: void checkAlterIsPossible(const AlterCommands & commands, ContextPtr context) const override; - void applySettings(const SettingsChanges & settings_changes, ContextPtr context) override; + void tryApplySettings(const SettingsChanges & settings_changes, ContextPtr context) override; void shutdown() override; @@ -76,7 +76,7 @@ protected: private: void startSynchronization(); - ASTPtr createAlterSettingsQuery(const SettingChange & change); + ASTPtr createAlterSettingsQuery(const SettingChange & new_setting); String getTablesList(const String & except = {}) const; diff --git a/src/Storages/AlterCommands.cpp b/src/Storages/AlterCommands.cpp index de9d9e594fd..6ee3db90724 100644 --- a/src/Storages/AlterCommands.cpp +++ b/src/Storages/AlterCommands.cpp @@ -365,7 +365,10 @@ void AlterCommands::apply(DatabasePtr database, ContextPtr context) const if (!command.ignore) { if (command.type == AlterCommand::MODIFY_DATABASE_SETTING) - database->modifySettings(command.settings_changes, context); + { + database->tryApplySettings(command.settings_changes, context); + database->modifySettingsMetadata(command.settings_changes, context); + } else throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unsupported alter command"); } diff --git a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp index 1bf3c5ce1ca..e38ea1e0958 100644 --- a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp +++ b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp @@ -51,7 +51,7 @@ StorageMaterializedPostgreSQL::StorageMaterializedPostgreSQL( std::unique_ptr replication_settings) : IStorage(table_id_) , WithContext(context_->getGlobalContext()) - , log(&Poco::Logger::get("StorageMaterializedPostgreSQL(" + postgres::formatNameForLogs(remote_database_name, remote_table_name) + ")")) + , log(&Poco::Logger::get("StorageMaterializedPostgreSQL(" + postgres::formatNameForLogs(remote_database_name, remote_table_name_) + ")")) , is_materialized_postgresql_database(false) , has_nested(false) , nested_context(makeNestedTableContext(context_->getGlobalContext())) From 76a4de2a31e2da8ee35eb67fc3676c9fad6716a0 Mon Sep 17 00:00:00 2001 From: hexiaoting Date: Mon, 30 Aug 2021 11:02:02 +0800 Subject: [PATCH 13/80] Fix bug --- src/Interpreters/TreeOptimizer.cpp | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/Interpreters/TreeOptimizer.cpp b/src/Interpreters/TreeOptimizer.cpp index ce9fb1b81f8..98c50e93c08 100644 --- a/src/Interpreters/TreeOptimizer.cpp +++ b/src/Interpreters/TreeOptimizer.cpp @@ -593,23 +593,24 @@ void optimizeFunctionsToSubcolumns(ASTPtr & query, const StorageMetadataPtr & me /// rewrite to : SELECT quantiles(0.5, 0.9, 0.95)(x)[1], quantiles(0.5, 0.9, 0.95)(x)[2], quantiles(0.5, 0.9, 0.95)(x)[3] FROM ... void fuseCandidate(std::unordered_map & fuse_quantile) { - for (auto & candidate : fuse_quantile) + for (auto candidate : fuse_quantile) { String func_name = candidate.first; - GatherFunctionQuantileData::FuseQuantileAggregatesData & args_to_functions = candidate.second; + GatherFunctionQuantileData::FuseQuantileAggregatesData args_to_functions = candidate.second; // Try to fuse multiply `quantile*` Function to plural - for (auto & it : args_to_functions.arg_map_function) + for (auto it : args_to_functions.arg_map_function) { std::vector & functions = it.second; size_t count = functions.size(); if (count > 1) { auto param_exp_list = std::make_shared(); - for (auto ast : functions) + for (auto * ast : functions) { const ASTs & parameters = (*ast)->as()->parameters->as().children; - assert(parameters.size() == 1); + if (parameters.size() > 1) + throw Exception("Aggregate function " + func_name + "require one parameter or less.", ErrorCodes::LOGICAL_ERROR); param_exp_list->children.push_back(parameters[0]); } auto func_base = makeASTFunction(quantile_fuse_name_mapping.find(func_name)->second, (*functions[0])->as()->arguments->children); From b387f05d9cfc5c78efb133dfe42b005a8942c56e Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Thu, 26 Aug 2021 16:19:52 +0300 Subject: [PATCH 14/80] resolve table dependencies on metadata loading --- src/Core/QualifiedTableName.h | 22 +- src/Databases/DDLDependencyVisitor.cpp | 84 ++++++++ src/Databases/DDLDependencyVisitor.h | 35 ++++ src/Databases/DatabaseOnDisk.cpp | 2 +- src/Databases/DatabaseOrdinary.cpp | 194 ++++++++++-------- src/Databases/DatabaseOrdinary.h | 6 + src/Databases/IDatabase.h | 15 ++ src/Databases/TablesLoader.cpp | 153 ++++++++++++++ src/Databases/TablesLoader.h | 63 ++++++ src/Interpreters/DatabaseCatalog.cpp | 9 - src/Interpreters/InterpreterCreateQuery.cpp | 10 +- src/Interpreters/InterpreterCreateQuery.h | 6 + src/Interpreters/loadMetadata.cpp | 9 + .../01160_table_dependencies.reference | 5 + .../0_stateless/01160_table_dependencies.sh | 43 ++++ 15 files changed, 555 insertions(+), 101 deletions(-) create mode 100644 src/Databases/DDLDependencyVisitor.cpp create mode 100644 src/Databases/DDLDependencyVisitor.h create mode 100644 src/Databases/TablesLoader.cpp create mode 100644 src/Databases/TablesLoader.h create mode 100644 tests/queries/0_stateless/01160_table_dependencies.reference create mode 100755 tests/queries/0_stateless/01160_table_dependencies.sh diff --git a/src/Core/QualifiedTableName.h b/src/Core/QualifiedTableName.h index 453d55d85c7..2b48d38ca2f 100644 --- a/src/Core/QualifiedTableName.h +++ b/src/Core/QualifiedTableName.h @@ -3,6 +3,8 @@ #include #include #include +#include +#include namespace DB { @@ -47,5 +49,23 @@ template <> struct hash return qualified_table.hash(); } }; - } + +namespace fmt +{ + template <> + struct formatter + { + constexpr auto parse(format_parse_context & ctx) + { + return ctx.begin(); + } + + template + auto format(const DB::QualifiedTableName & name, FormatContext & ctx) + { + return format_to(ctx.out(), "{}.{}", DB::backQuoteIfNeed(name.database), DB::backQuoteIfNeed(name.table)); + } + }; +} + diff --git a/src/Databases/DDLDependencyVisitor.cpp b/src/Databases/DDLDependencyVisitor.cpp new file mode 100644 index 00000000000..7408e74f012 --- /dev/null +++ b/src/Databases/DDLDependencyVisitor.cpp @@ -0,0 +1,84 @@ +#include +#include +#include +#include +#include + +namespace DB +{ + +void DDLDependencyVisitor::visit(const ASTPtr & ast, Data & data) +{ + if (const auto * function = ast->as()) + visit(*function, data); +} + +bool DDLDependencyVisitor::needChildVisit(const ASTPtr & node, const ASTPtr & /*child*/) +{ + return !node->as(); +} + +void DDLDependencyVisitor::visit(const ASTFunction & function, Data & data) +{ + if (function.name == "joinGet" || + function.name == "dictHas" || + function.name == "dictIsIn" || + function.name.starts_with("dictGet")) + { + extractTableNameFromArgument(function, data, 0); + } +} + +void DDLDependencyVisitor::extractTableNameFromArgument(const ASTFunction & function, Data & data, size_t arg_idx) +{ + /// Just ignore incorrect arguments, proper exception will be thrown later + if (!function.arguments || function.arguments->children.size() <= arg_idx) + return; + + String database_name; + String table_name; + + const auto * arg = function.arguments->as()->children[arg_idx].get(); + if (const auto * literal = arg->as()) + { + if (literal->value.getType() != Field::Types::String) + return; + + String maybe_qualified_name = literal->value.get(); + auto pos = maybe_qualified_name.find('.'); + if (pos == 0 || pos == (maybe_qualified_name.size() - 1)) + { + /// Most likely name is invalid + return; + } + else if (pos == std::string::npos) + { + table_name = std::move(maybe_qualified_name); + } + else + { + database_name = maybe_qualified_name.substr(0, pos); + table_name = maybe_qualified_name.substr(pos + 1); + } + } + else if (const auto * identifier = arg->as()) + { + auto table_identifier = identifier->createTable(); + if (!table_identifier) + return; + + database_name = table_identifier->getDatabaseName(); + table_name = table_identifier->shortName(); + } + else + { + assert(false); + return; + } + + if (database_name.empty()) + database_name = data.default_database; + data.dependencies.push_back(QualifiedTableName{std::move(database_name), std::move(table_name)}); +} + +} diff --git a/src/Databases/DDLDependencyVisitor.h b/src/Databases/DDLDependencyVisitor.h new file mode 100644 index 00000000000..708e0bca66e --- /dev/null +++ b/src/Databases/DDLDependencyVisitor.h @@ -0,0 +1,35 @@ +#pragma once +#include +#include +#include + +namespace DB +{ + +class ASTFunction; + + +class DDLDependencyVisitor +{ +public: + struct Data + { + using TableDependencies = std::vector; + String default_database; + TableDependencies dependencies; + }; + + using Visitor = ConstInDepthNodeVisitor; + + static void visit(const ASTPtr & ast, Data & data); + static bool needChildVisit(const ASTPtr & node, const ASTPtr & child); + +private: + static void visit(const ASTFunction & function, Data & data); + + static void extractTableNameFromArgument(const ASTFunction & function, Data & data, size_t arg_idx); +}; + +using TableLoadingDependenciesVisitor = DDLDependencyVisitor::Visitor; + +} diff --git a/src/Databases/DatabaseOnDisk.cpp b/src/Databases/DatabaseOnDisk.cpp index 620e560b64c..dad059a2008 100644 --- a/src/Databases/DatabaseOnDisk.cpp +++ b/src/Databases/DatabaseOnDisk.cpp @@ -608,7 +608,7 @@ void DatabaseOnDisk::iterateMetadataFiles(ContextPtr local_context, const Iterat } /// Read and parse metadata in parallel - ThreadPool pool; + ThreadPool pool{1}; for (const auto & file : metadata_files) { pool.scheduleOrThrowOnError([&]() diff --git a/src/Databases/DatabaseOrdinary.cpp b/src/Databases/DatabaseOrdinary.cpp index bfe5de4c95f..f82db868ac8 100644 --- a/src/Databases/DatabaseOrdinary.cpp +++ b/src/Databases/DatabaseOrdinary.cpp @@ -4,6 +4,8 @@ #include #include #include +#include +#include #include #include #include @@ -27,8 +29,6 @@ namespace fs = std::filesystem; namespace DB { -static constexpr size_t PRINT_MESSAGE_EACH_N_OBJECTS = 256; -static constexpr size_t PRINT_MESSAGE_EACH_N_SECONDS = 5; static constexpr size_t METADATA_FILE_BUFFER_SIZE = 32768; namespace @@ -60,15 +60,6 @@ namespace throw; } } - - void logAboutProgress(Poco::Logger * log, size_t processed, size_t total, AtomicStopwatch & watch) - { - if (processed % PRINT_MESSAGE_EACH_N_OBJECTS == 0 || watch.compareAndRestart(PRINT_MESSAGE_EACH_N_SECONDS)) - { - LOG_INFO(log, "{}%", processed * 100.0 / total); - watch.restart(); - } - } } @@ -90,14 +81,82 @@ void DatabaseOrdinary::loadStoredObjects( * Otherwise (for the ext4 filesystem), `DirectoryIterator` iterates through them in some order, * which does not correspond to order tables creation and does not correspond to order of their location on disk. */ - using FileNames = std::map; - std::mutex file_names_mutex; - FileNames file_names; - size_t total_dictionaries = 0; + ParsedTablesMetadata metadata; + loadTablesMetadata(local_context, metadata); - auto process_metadata = [&file_names, &total_dictionaries, &file_names_mutex, this]( - const String & file_name) + size_t total_tables = metadata.metadata.size() - metadata.total_dictionaries; + + AtomicStopwatch watch; + std::atomic dictionaries_processed{0}; + std::atomic tables_processed{0}; + + ThreadPool pool; + + /// We must attach dictionaries before attaching tables + /// because while we're attaching tables we may need to have some dictionaries attached + /// (for example, dictionaries can be used in the default expressions for some tables). + /// On the other hand we can attach any dictionary (even sourced from ClickHouse table) + /// without having any tables attached. It is so because attaching of a dictionary means + /// loading of its config only, it doesn't involve loading the dictionary itself. + + /// Attach dictionaries. + for (const auto & name_with_path_and_query : metadata.metadata) + { + const auto & name = name_with_path_and_query.first; + const auto & path = name_with_path_and_query.second.first; + const auto & ast = name_with_path_and_query.second.second; + const auto & create_query = ast->as(); + + if (create_query.is_dictionary) + { + pool.scheduleOrThrowOnError([&]() + { + loadTableFromMetadata(local_context, path, name, ast, has_force_restore_data_flag); + + /// Messages, so that it's not boring to wait for the server to load for a long time. + logAboutProgress(log, ++dictionaries_processed, metadata.total_dictionaries, watch); + }); + } + } + + pool.wait(); + + /// Attach tables. + for (const auto & name_with_path_and_query : metadata.metadata) + { + const auto & name = name_with_path_and_query.first; + const auto & path = name_with_path_and_query.second.first; + const auto & ast = name_with_path_and_query.second.second; + const auto & create_query = ast->as(); + + if (!create_query.is_dictionary) + { + pool.scheduleOrThrowOnError([&]() + { + loadTableFromMetadata(local_context, path, name, ast, has_force_restore_data_flag); + + /// Messages, so that it's not boring to wait for the server to load for a long time. + logAboutProgress(log, ++tables_processed, total_tables, watch); + }); + } + } + + pool.wait(); + + if (!skip_startup_tables) + { + /// After all tables was basically initialized, startup them. + startupTablesImpl(pool); + } +} + +void DatabaseOrdinary::loadTablesMetadata(ContextPtr local_context, ParsedTablesMetadata & metadata) +{ + size_t prev_tables_count = metadata.metadata.size(); + size_t prev_total_dictionaries = metadata.total_dictionaries; + + auto process_metadata = [&metadata, this](const String & file_name) { fs::path path(getMetadataPath()); fs::path file_path(file_name); @@ -122,9 +181,19 @@ void DatabaseOrdinary::loadStoredObjects( return; } - std::lock_guard lock{file_names_mutex}; - file_names[file_name] = ast; - total_dictionaries += create_query->is_dictionary; + TableLoadingDependenciesVisitor::Data data; + data.default_database = metadata.default_database; + TableLoadingDependenciesVisitor visitor{data}; + visitor.visit(ast); + QualifiedTableName qualified_name{database_name, create_query->table}; + + std::lock_guard lock{metadata.mutex}; + metadata.metadata[qualified_name] = std::make_pair(full_path.string(), std::move(ast)); + if (data.dependencies.empty()) + metadata.independent_tables.insert(std::move(qualified_name)); + else + metadata.table_dependencies.insert({std::move(qualified_name), std::move(data.dependencies)}); + metadata.total_dictionaries += create_query->is_dictionary; } } catch (Exception & e) @@ -136,77 +205,28 @@ void DatabaseOrdinary::loadStoredObjects( iterateMetadataFiles(local_context, process_metadata); - size_t total_tables = file_names.size() - total_dictionaries; + size_t objects_in_database = metadata.metadata.size() - prev_tables_count; + size_t dictionaries_in_database = metadata.total_dictionaries - prev_total_dictionaries; + size_t tables_in_database = objects_in_database - dictionaries_in_database; - LOG_INFO(log, "Total {} tables and {} dictionaries.", total_tables, total_dictionaries); + LOG_INFO(log, "Total {} tables and {} dictionaries.", tables_in_database, dictionaries_in_database); +} - AtomicStopwatch watch; - std::atomic tables_processed{0}; +void DatabaseOrdinary::loadTableFromMetadata(ContextMutablePtr local_context, const String & file_path, const QualifiedTableName & name, const ASTPtr & ast, bool force_restore) +{ + assert(name.database == database_name); + const auto & create_query = ast->as(); - ThreadPool pool; + tryAttachTable( + local_context, + create_query, + *this, + name.database, + file_path, + force_restore); - /// We must attach dictionaries before attaching tables - /// because while we're attaching tables we may need to have some dictionaries attached - /// (for example, dictionaries can be used in the default expressions for some tables). - /// On the other hand we can attach any dictionary (even sourced from ClickHouse table) - /// without having any tables attached. It is so because attaching of a dictionary means - /// loading of its config only, it doesn't involve loading the dictionary itself. - - /// Attach dictionaries. - for (const auto & name_with_query : file_names) - { - const auto & create_query = name_with_query.second->as(); - - if (create_query.is_dictionary) - { - pool.scheduleOrThrowOnError([&]() - { - tryAttachTable( - local_context, - create_query, - *this, - database_name, - getMetadataPath() + name_with_query.first, - has_force_restore_data_flag); - - /// Messages, so that it's not boring to wait for the server to load for a long time. - logAboutProgress(log, ++tables_processed, total_tables, watch); - }); - } - } - - pool.wait(); - - /// Attach tables. - for (const auto & name_with_query : file_names) - { - const auto & create_query = name_with_query.second->as(); - - if (!create_query.is_dictionary) - { - pool.scheduleOrThrowOnError([&]() - { - tryAttachTable( - local_context, - create_query, - *this, - database_name, - getMetadataPath() + name_with_query.first, - has_force_restore_data_flag); - - /// Messages, so that it's not boring to wait for the server to load for a long time. - logAboutProgress(log, ++tables_processed, total_tables, watch); - }); - } - } - - pool.wait(); - - if (!skip_startup_tables) - { - /// After all tables was basically initialized, startup them. - startupTablesImpl(pool); - } + /// Messages, so that it's not boring to wait for the server to load for a long time. + //logAboutProgress(log, ++tables_processed, total_tables, watch); } void DatabaseOrdinary::startupTables() diff --git a/src/Databases/DatabaseOrdinary.h b/src/Databases/DatabaseOrdinary.h index 7832377ccae..08ed79ad0ec 100644 --- a/src/Databases/DatabaseOrdinary.h +++ b/src/Databases/DatabaseOrdinary.h @@ -22,6 +22,12 @@ public: void loadStoredObjects(ContextMutablePtr context, bool has_force_restore_data_flag, bool force_attach, bool skip_startup_tables) override; + bool supportsLoadingInTopologicalOrder() const override { return true; } + + void loadTablesMetadata(ContextPtr context, ParsedTablesMetadata & metadata) override; + + void loadTableFromMetadata(ContextMutablePtr local_context, const String & file_path, const QualifiedTableName & name, const ASTPtr & ast, bool force_restore) override; + void startupTables() override; void alterTable( diff --git a/src/Databases/IDatabase.h b/src/Databases/IDatabase.h index bd9605dca71..4c9350905c3 100644 --- a/src/Databases/IDatabase.h +++ b/src/Databases/IDatabase.h @@ -7,6 +7,8 @@ #include #include +#include //FIXME + #include #include #include @@ -25,6 +27,7 @@ struct StorageInMemoryMetadata; struct StorageID; class ASTCreateQuery; using DictionariesWithID = std::vector>; +struct ParsedTablesMetadata; namespace ErrorCodes { @@ -131,6 +134,18 @@ public: { } + virtual bool supportsLoadingInTopologicalOrder() const { return false; } + + virtual void loadTablesMetadata(ContextPtr /*local_context*/, ParsedTablesMetadata & /*metadata*/) + { + throw Exception(ErrorCodes::LOGICAL_ERROR, "Not implemented"); + } + + virtual void loadTableFromMetadata(ContextMutablePtr /*local_context*/, const String & /*file_path*/, const QualifiedTableName & /*name*/, const ASTPtr & /*ast*/, bool /*force_restore*/) + { + throw Exception(ErrorCodes::LOGICAL_ERROR, "Not implemented"); + } + virtual void startupTables() {} /// Check the existence of the table. diff --git a/src/Databases/TablesLoader.cpp b/src/Databases/TablesLoader.cpp new file mode 100644 index 00000000000..e1d79a0b826 --- /dev/null +++ b/src/Databases/TablesLoader.cpp @@ -0,0 +1,153 @@ +#include + +namespace DB +{ + +static constexpr size_t PRINT_MESSAGE_EACH_N_OBJECTS = 256; +static constexpr size_t PRINT_MESSAGE_EACH_N_SECONDS = 5; + + +void logAboutProgress(Poco::Logger * log, size_t processed, size_t total, AtomicStopwatch & watch) +{ + if (processed % PRINT_MESSAGE_EACH_N_OBJECTS == 0 || watch.compareAndRestart(PRINT_MESSAGE_EACH_N_SECONDS)) + { + LOG_INFO(log, "{}%", processed * 100.0 / total); + watch.restart(); + } +} + +TablesLoader::TablesLoader(ContextMutablePtr global_context_, Databases databases_, bool force_restore_, bool force_attach_) +: global_context(global_context_) +, databases(std::move(databases_)) +, force_restore(force_restore_) +, force_attach(force_attach_) +{ + all_tables.default_database = global_context->getCurrentDatabase(); + log = &Poco::Logger::get("TablesLoader"); +} + + +void TablesLoader::loadTables() +{ + for (auto & database : databases) + { + if (database->supportsLoadingInTopologicalOrder()) + databases_to_load.emplace(database->getDatabaseName(), database); + else + database->loadStoredObjects(global_context, force_restore, force_attach, true); + } + + for (auto & database : databases_to_load) + database.second->loadTablesMetadata(global_context, all_tables); + + auto table_does_not_exist = [&](const QualifiedTableName & table_name, const QualifiedTableName & dependency_name) + { + if (all_tables.metadata.contains(dependency_name)) + return false; + if (DatabaseCatalog::instance().isTableExist(StorageID(dependency_name.database, dependency_name.table), global_context)) + return false; + /// FIXME if XML dict + + LOG_WARNING(log, "Table {} depends on {}, but seems like the second one does not exist", table_name, dependency_name); + return true; + }; + + removeDependencies(table_does_not_exist, all_tables.independent_tables); + + //LOG_TRACE(log, "Independent database objects: {}", fmt::join(all_tables.independent_tables, ", ")); + //for (const auto & dependencies : all_tables.table_dependencies) + // LOG_TRACE(log, "Database object {} depends on: {}", dependencies.first, fmt::join(dependencies.second, ", ")); + + auto is_dependency_loaded = [&](const QualifiedTableName & /*table_name*/, const QualifiedTableName & dependency_name) + { + return all_tables.independent_tables.contains(dependency_name); + }; + + AtomicStopwatch watch; + ThreadPool pool; + size_t level = 0; + do + { + assert(all_tables.metadata.size() == tables_processed + all_tables.independent_tables.size() + all_tables.table_dependencies.size()); + startLoadingIndependentTables(pool, watch, level); + std::unordered_set new_independent_tables; + removeDependencies(is_dependency_loaded, new_independent_tables); + pool.wait(); + all_tables.independent_tables = std::move(new_independent_tables); + checkCyclicDependencies(); + ++level; + assert(all_tables.metadata.size() == tables_processed + all_tables.independent_tables.size() + all_tables.table_dependencies.size()); + } while (!all_tables.independent_tables.empty()); + + for (auto & database : databases_to_load) + { + database.second->startupTables(); + } +} + +void TablesLoader::removeDependencies(RemoveDependencyPredicate need_remove_dependency, std::unordered_set & independent_tables) +{ + auto table_it = all_tables.table_dependencies.begin(); + while (table_it != all_tables.table_dependencies.end()) + { + auto & dependencies = table_it->second; + assert(!dependencies.empty()); + auto dependency_it = dependencies.begin(); + while (dependency_it != dependencies.end()) + { + if (need_remove_dependency(table_it->first, *dependency_it)) + dependency_it = dependencies.erase(dependency_it); + else + ++dependency_it; + } + + if (dependencies.empty()) + { + independent_tables.emplace(std::move(table_it->first)); + table_it = all_tables.table_dependencies.erase(table_it); + } + else + { + ++table_it; + } + } +} + +void TablesLoader::startLoadingIndependentTables(ThreadPool & pool, AtomicStopwatch & watch, size_t level) +{ + size_t total_tables = all_tables.metadata.size(); + + LOG_INFO(log, "Loading {} tables with {} dependency level", all_tables.independent_tables.size(), level); + + for (const auto & table_name : all_tables.independent_tables) + { + pool.scheduleOrThrowOnError([&]() + { + const auto & path_and_query = all_tables.metadata[table_name]; + const auto & path = path_and_query.first; + const auto & ast = path_and_query.second; + databases_to_load[table_name.database]->loadTableFromMetadata(global_context, path, table_name, ast, force_restore); + logAboutProgress(log, ++tables_processed, total_tables, watch); + }); + } +} + +void TablesLoader::checkCyclicDependencies() const +{ + if (!all_tables.independent_tables.empty()) + return; + if (all_tables.table_dependencies.empty()) + return; + + for (const auto & dependencies : all_tables.table_dependencies) + { + LOG_WARNING(log, "Cannot resolve dependencies: Table {} depends on {}", + dependencies.first, fmt::join(dependencies.second, ", ")); + } + + throw Exception(ErrorCodes::INFINITE_LOOP, "Cannot attach {} tables due to cyclic dependencies. " + "See server log for details.", all_tables.table_dependencies.size()); +} + +} + diff --git a/src/Databases/TablesLoader.h b/src/Databases/TablesLoader.h new file mode 100644 index 00000000000..f46929ec179 --- /dev/null +++ b/src/Databases/TablesLoader.h @@ -0,0 +1,63 @@ +#pragma once +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int INFINITE_LOOP; +} + +void logAboutProgress(Poco::Logger * log, size_t processed, size_t total, AtomicStopwatch & watch); + +struct ParsedTablesMetadata +{ + String default_database; + + using ParsedMetadata = std::map>; + std::mutex mutex; + ParsedMetadata metadata; + size_t total_dictionaries = 0; + std::unordered_set independent_tables; + std::unordered_map> table_dependencies; +}; + +class TablesLoader +{ +public: + + using Databases = std::vector; + + TablesLoader(ContextMutablePtr global_context_, Databases databases_, bool force_restore_ = false, bool force_attach_ = false); + + void loadTables(); + +private: + ContextMutablePtr global_context; + Databases databases; + bool force_restore; + bool force_attach; + + std::map databases_to_load; + ParsedTablesMetadata all_tables; + Poco::Logger * log; + std::atomic tables_processed{0}; + + + using RemoveDependencyPredicate = std::function; + void removeDependencies(RemoveDependencyPredicate need_remove_dependency, std::unordered_set & independent_tables); + + void startLoadingIndependentTables(ThreadPool & pool, AtomicStopwatch & watch, size_t level); + + void checkCyclicDependencies() const; + +}; + +} diff --git a/src/Interpreters/DatabaseCatalog.cpp b/src/Interpreters/DatabaseCatalog.cpp index 6e0ca97df1d..99ab3cabd31 100644 --- a/src/Interpreters/DatabaseCatalog.cpp +++ b/src/Interpreters/DatabaseCatalog.cpp @@ -157,15 +157,6 @@ void DatabaseCatalog::loadDatabases() /// Another background thread which drops temporary LiveViews. /// We should start it after loadMarkedAsDroppedTables() to avoid race condition. TemporaryLiveViewCleaner::instance().startup(); - - /// Start up tables after all databases are loaded. - for (const auto & [database_name, database] : databases) - { - if (database_name == DatabaseCatalog::TEMPORARY_DATABASE) - continue; - - database->startupTables(); - } } void DatabaseCatalog::shutdownImpl() diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index 7e061662534..5bddcb9fe1d 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -53,6 +53,7 @@ #include #include #include +#include #include @@ -271,9 +272,12 @@ BlockIO InterpreterCreateQuery::createDatabase(ASTCreateQuery & create) renamed = true; } - /// We use global context here, because storages lifetime is bigger than query context lifetime - database->loadStoredObjects( - getContext()->getGlobalContext(), has_force_restore_data_flag, create.attach && force_attach, skip_startup_tables); //-V560 + if (!load_database_without_tables) + { + /// We use global context here, because storages lifetime is bigger than query context lifetime + TablesLoader loader{getContext()->getGlobalContext(), {database}, has_force_restore_data_flag, create.attach && force_attach}; + loader.loadTables(); + } } catch (...) { diff --git a/src/Interpreters/InterpreterCreateQuery.h b/src/Interpreters/InterpreterCreateQuery.h index 1ef5e0470fc..47d0e2f492d 100644 --- a/src/Interpreters/InterpreterCreateQuery.h +++ b/src/Interpreters/InterpreterCreateQuery.h @@ -57,6 +57,11 @@ public: skip_startup_tables = skip_startup_tables_; } + void setLoadDatabaseWithoutTables(bool load_database_without_tables_) + { + load_database_without_tables = load_database_without_tables_; + } + /// Obtain information about columns, their types, default values and column comments, /// for case when columns in CREATE query is specified explicitly. static ColumnsDescription getColumnsDescription(const ASTExpressionList & columns, ContextPtr context, bool attach); @@ -100,6 +105,7 @@ private: bool internal = false; bool force_attach = false; bool skip_startup_tables = false; + bool load_database_without_tables = false; mutable String as_database_saved; mutable String as_table_saved; diff --git a/src/Interpreters/loadMetadata.cpp b/src/Interpreters/loadMetadata.cpp index 458e17ac16b..e4c73e7d4e5 100644 --- a/src/Interpreters/loadMetadata.cpp +++ b/src/Interpreters/loadMetadata.cpp @@ -11,6 +11,7 @@ #include #include +#include #include #include @@ -44,6 +45,7 @@ static void executeCreateQuery( interpreter.setForceAttach(true); interpreter.setForceRestoreData(has_force_restore_data_flag); interpreter.setSkipStartupTables(true); + interpreter.setLoadDatabaseWithoutTables(database != DatabaseCatalog::SYSTEM_DATABASE); interpreter.execute(); } @@ -155,8 +157,15 @@ void loadMetadata(ContextMutablePtr context, const String & default_database_nam if (create_default_db_if_not_exists && !metadata_dir_for_default_db_already_exists) databases.emplace(default_database_name, path + "/" + escapeForFileName(default_database_name)); + TablesLoader::Databases loaded_databases; for (const auto & [name, db_path] : databases) + { loadDatabase(context, name, db_path, has_force_restore_data_flag); + loaded_databases.emplace_back(DatabaseCatalog::instance().getDatabase(name)); + } + + TablesLoader loader{context, std::move(loaded_databases), has_force_restore_data_flag, /* force_attach */ true}; + loader.loadTables(); if (has_force_restore_data_flag) { diff --git a/tests/queries/0_stateless/01160_table_dependencies.reference b/tests/queries/0_stateless/01160_table_dependencies.reference new file mode 100644 index 00000000000..6691df07cb9 --- /dev/null +++ b/tests/queries/0_stateless/01160_table_dependencies.reference @@ -0,0 +1,5 @@ +dict1 +dict2 +dict_src +join +t diff --git a/tests/queries/0_stateless/01160_table_dependencies.sh b/tests/queries/0_stateless/01160_table_dependencies.sh new file mode 100755 index 00000000000..ecd941a09b1 --- /dev/null +++ b/tests/queries/0_stateless/01160_table_dependencies.sh @@ -0,0 +1,43 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + + +$CLICKHOUSE_CLIENT -q "drop table if exists dict_src;" +$CLICKHOUSE_CLIENT -q "drop dictionary if exists dict1;" +$CLICKHOUSE_CLIENT -q "drop dictionary if exists dict2;" +$CLICKHOUSE_CLIENT -q "drop table if exists join;" +$CLICKHOUSE_CLIENT -q "drop table if exists t;" + +$CLICKHOUSE_CLIENT -q "create table dict_src (n int, m int, s String) engine=MergeTree order by n;" + +$CLICKHOUSE_CLIENT -q "create dictionary dict1 (n int default 0, m int default 1, s String default 'qqq') +PRIMARY KEY n +SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'dict_src' PASSWORD '' DB '$CLICKHOUSE_DATABASE')) +LIFETIME(MIN 1 MAX 10) LAYOUT(FLAT());" + +$CLICKHOUSE_CLIENT -q "create table join(n int, m int default dictGet('$CLICKHOUSE_DATABASE.dict1', 'm', 42::UInt64)) engine=Join(any, left, n);" + +$CLICKHOUSE_CLIENT -q "create dictionary dict2 (n int default 0, m int DEFAULT 2, s String default 'asd') +PRIMARY KEY n +SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'join' PASSWORD '' DB '$CLICKHOUSE_DATABASE')) +LIFETIME(MIN 1 MAX 10) LAYOUT(FLAT());" + +$CLICKHOUSE_CLIENT -q "create table t (n int, m int default joinGet($CLICKHOUSE_DATABASE.join, 'm', 42::int), +s String default dictGet($CLICKHOUSE_DATABASE.dict1, 's', 42::UInt64)) engine=MergeTree order by n;" + +CLICKHOUSE_CLIENT_DEFAULT_DB=$(echo ${CLICKHOUSE_CLIENT} | sed 's/'"--database=${CLICKHOUSE_DATABASE}"'/--database=default/g') + +for i in {1..10}; do + $CLICKHOUSE_CLIENT_DEFAULT_DB -q "detach database $CLICKHOUSE_DATABASE;" + $CLICKHOUSE_CLIENT_DEFAULT_DB -q "attach database $CLICKHOUSE_DATABASE;" +done +$CLICKHOUSE_CLIENT -q "show tables from $CLICKHOUSE_DATABASE;" + +$CLICKHOUSE_CLIENT -q "drop table dict_src;" +$CLICKHOUSE_CLIENT -q "drop dictionary dict1;" +$CLICKHOUSE_CLIENT -q "drop dictionary dict2;" +$CLICKHOUSE_CLIENT -q "drop table join;" +$CLICKHOUSE_CLIENT -q "drop table t;" From 8456fdd7583f70e848770132940df90b6f595496 Mon Sep 17 00:00:00 2001 From: hexiaoting Date: Tue, 31 Aug 2021 11:35:25 +0800 Subject: [PATCH 15/80] Fix compiler error --- src/Interpreters/TreeOptimizer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Interpreters/TreeOptimizer.cpp b/src/Interpreters/TreeOptimizer.cpp index 98c50e93c08..fb220cddc02 100644 --- a/src/Interpreters/TreeOptimizer.cpp +++ b/src/Interpreters/TreeOptimizer.cpp @@ -593,7 +593,7 @@ void optimizeFunctionsToSubcolumns(ASTPtr & query, const StorageMetadataPtr & me /// rewrite to : SELECT quantiles(0.5, 0.9, 0.95)(x)[1], quantiles(0.5, 0.9, 0.95)(x)[2], quantiles(0.5, 0.9, 0.95)(x)[3] FROM ... void fuseCandidate(std::unordered_map & fuse_quantile) { - for (auto candidate : fuse_quantile) + for (const auto & candidate : fuse_quantile) { String func_name = candidate.first; GatherFunctionQuantileData::FuseQuantileAggregatesData args_to_functions = candidate.second; From c8d8f0a38c7748131e07038570ffc793b93f63eb Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Tue, 31 Aug 2021 11:53:48 +0300 Subject: [PATCH 16/80] fix --- src/Databases/DatabaseAtomic.cpp | 59 +++++++++++-------- src/Databases/DatabaseAtomic.h | 4 ++ src/Databases/DatabaseOrdinary.cpp | 15 +---- src/Databases/DatabaseOrdinary.h | 4 +- src/Databases/DatabaseReplicated.cpp | 12 +++- src/Databases/DatabaseReplicated.h | 5 ++ src/Databases/IDatabase.h | 11 +++- .../MySQL/DatabaseMaterializedMySQL.cpp | 6 +- .../MySQL/DatabaseMaterializedMySQL.h | 2 +- .../DatabaseMaterializedPostgreSQL.cpp | 6 +- .../DatabaseMaterializedPostgreSQL.h | 2 +- src/Databases/TablesLoader.cpp | 37 ++++++++---- src/Databases/TablesLoader.h | 25 +++++--- src/Interpreters/InterpreterCreateQuery.cpp | 2 +- .../0_stateless/01160_table_dependencies.sh | 2 +- 15 files changed, 118 insertions(+), 74 deletions(-) diff --git a/src/Databases/DatabaseAtomic.cpp b/src/Databases/DatabaseAtomic.cpp index 2dbcd652004..83763ccd856 100644 --- a/src/Databases/DatabaseAtomic.cpp +++ b/src/Databases/DatabaseAtomic.cpp @@ -416,38 +416,47 @@ UUID DatabaseAtomic::tryGetTableUUID(const String & table_name) const return UUIDHelpers::Nil; } +void DatabaseAtomic::beforeLoadingMetadata(ContextMutablePtr /*context*/, bool has_force_restore_data_flag, bool /*force_attach*/) +{ + if (!has_force_restore_data_flag) + return; + + /// Recreate symlinks to table data dirs in case of force restore, because some of them may be broken + for (const auto & table_path : fs::directory_iterator(path_to_table_symlinks)) + { + if (!fs::is_symlink(table_path)) + { + throw Exception(ErrorCodes::ABORTED, + "'{}' is not a symlink. Atomic database should contains only symlinks.", std::string(table_path.path())); + } + + fs::remove(table_path); + } +} + void DatabaseAtomic::loadStoredObjects( ContextMutablePtr local_context, bool has_force_restore_data_flag, bool force_attach, bool skip_startup_tables) { - /// Recreate symlinks to table data dirs in case of force restore, because some of them may be broken - if (has_force_restore_data_flag) - { - for (const auto & table_path : fs::directory_iterator(path_to_table_symlinks)) - { - if (!fs::is_symlink(table_path)) - { - throw Exception(ErrorCodes::ABORTED, - "'{}' is not a symlink. Atomic database should contains only symlinks.", std::string(table_path.path())); - } - - fs::remove(table_path); - } - } - + beforeLoadingMetadata(local_context, has_force_restore_data_flag, force_attach); DatabaseOrdinary::loadStoredObjects(local_context, has_force_restore_data_flag, force_attach, skip_startup_tables); +} - if (has_force_restore_data_flag) +void DatabaseAtomic::startupTables(ThreadPool & thread_pool, bool force_restore, bool force_attach) +{ + DatabaseOrdinary::startupTables(thread_pool, force_restore, force_attach); + + if (!force_restore) + return; + + NameToPathMap table_names; { - NameToPathMap table_names; - { - std::lock_guard lock{mutex}; - table_names = table_name_to_path; - } - - fs::create_directories(path_to_table_symlinks); - for (const auto & table : table_names) - tryCreateSymlink(table.first, table.second, true); + std::lock_guard lock{mutex}; + table_names = table_name_to_path; } + + fs::create_directories(path_to_table_symlinks); + for (const auto & table : table_names) + tryCreateSymlink(table.first, table.second, true); } void DatabaseAtomic::tryCreateSymlink(const String & table_name, const String & actual_data_path, bool if_data_path_exist) diff --git a/src/Databases/DatabaseAtomic.h b/src/Databases/DatabaseAtomic.h index 8be009cd6ca..db9cef4dbc6 100644 --- a/src/Databases/DatabaseAtomic.h +++ b/src/Databases/DatabaseAtomic.h @@ -49,6 +49,10 @@ public: void loadStoredObjects(ContextMutablePtr context, bool has_force_restore_data_flag, bool force_attach, bool skip_startup_tables) override; + void beforeLoadingMetadata(ContextMutablePtr context, bool has_force_restore_data_flag, bool force_attach) override; + + void startupTables(ThreadPool & thread_pool, bool force_restore, bool force_attach) override; + /// Atomic database cannot be detached if there is detached table which still in use void assertCanBeDetached(bool cleanup) override; diff --git a/src/Databases/DatabaseOrdinary.cpp b/src/Databases/DatabaseOrdinary.cpp index f82db868ac8..567bf8726e3 100644 --- a/src/Databases/DatabaseOrdinary.cpp +++ b/src/Databases/DatabaseOrdinary.cpp @@ -75,7 +75,7 @@ DatabaseOrdinary::DatabaseOrdinary( } void DatabaseOrdinary::loadStoredObjects( - ContextMutablePtr local_context, bool has_force_restore_data_flag, bool /*force_attach*/, bool skip_startup_tables) + ContextMutablePtr local_context, bool has_force_restore_data_flag, bool force_attach, bool skip_startup_tables) { /** Tables load faster if they are loaded in sorted (by name) order. * Otherwise (for the ext4 filesystem), `DirectoryIterator` iterates through them in some order, @@ -147,7 +147,7 @@ void DatabaseOrdinary::loadStoredObjects( if (!skip_startup_tables) { /// After all tables was basically initialized, startup them. - startupTablesImpl(pool); + startupTables(pool, has_force_restore_data_flag, force_attach); } } @@ -224,18 +224,9 @@ void DatabaseOrdinary::loadTableFromMetadata(ContextMutablePtr local_context, co name.database, file_path, force_restore); - - /// Messages, so that it's not boring to wait for the server to load for a long time. - //logAboutProgress(log, ++tables_processed, total_tables, watch); } -void DatabaseOrdinary::startupTables() -{ - ThreadPool pool; - startupTablesImpl(pool); -} - -void DatabaseOrdinary::startupTablesImpl(ThreadPool & thread_pool) +void DatabaseOrdinary::startupTables(ThreadPool & thread_pool, bool /*force_restore*/, bool /*force_attach*/) { LOG_INFO(log, "Starting up tables."); diff --git a/src/Databases/DatabaseOrdinary.h b/src/Databases/DatabaseOrdinary.h index 08ed79ad0ec..3f300bfb3eb 100644 --- a/src/Databases/DatabaseOrdinary.h +++ b/src/Databases/DatabaseOrdinary.h @@ -28,7 +28,7 @@ public: void loadTableFromMetadata(ContextMutablePtr local_context, const String & file_path, const QualifiedTableName & name, const ASTPtr & ast, bool force_restore) override; - void startupTables() override; + void startupTables(ThreadPool & thread_pool, bool force_restore, bool force_attach) override; void alterTable( ContextPtr context, @@ -42,8 +42,6 @@ protected: const String & table_metadata_path, const String & statement, ContextPtr query_context); - - void startupTablesImpl(ThreadPool & thread_pool); }; } diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp index da03eb6aba6..9aebc701aa9 100644 --- a/src/Databases/DatabaseReplicated.cpp +++ b/src/Databases/DatabaseReplicated.cpp @@ -305,13 +305,21 @@ void DatabaseReplicated::createReplicaNodesInZooKeeper(const zkutil::ZooKeeperPt createEmptyLogEntry(current_zookeeper); } +void DatabaseReplicated::beforeLoadingMetadata(ContextMutablePtr /*context*/, bool /*has_force_restore_data_flag*/, bool force_attach) +{ + tryConnectToZooKeeperAndInitDatabase(force_attach); +} + void DatabaseReplicated::loadStoredObjects( ContextMutablePtr local_context, bool has_force_restore_data_flag, bool force_attach, bool skip_startup_tables) { - tryConnectToZooKeeperAndInitDatabase(force_attach); - + beforeLoadingMetadata(local_context, has_force_restore_data_flag, force_attach); DatabaseAtomic::loadStoredObjects(local_context, has_force_restore_data_flag, force_attach, skip_startup_tables); +} +void DatabaseReplicated::startupTables(ThreadPool & thread_pool, bool force_restore, bool force_attach) +{ + DatabaseAtomic::startupTables(thread_pool, force_restore, force_attach); ddl_worker = std::make_unique(this, getContext()); ddl_worker->startup(); } diff --git a/src/Databases/DatabaseReplicated.h b/src/Databases/DatabaseReplicated.h index 1e0daeed07e..daba7dad17b 100644 --- a/src/Databases/DatabaseReplicated.h +++ b/src/Databases/DatabaseReplicated.h @@ -58,6 +58,11 @@ public: void drop(ContextPtr /*context*/) override; void loadStoredObjects(ContextMutablePtr context, bool has_force_restore_data_flag, bool force_attach, bool skip_startup_tables) override; + + void beforeLoadingMetadata(ContextMutablePtr context, bool has_force_restore_data_flag, bool force_attach) override; + + void startupTables(ThreadPool & thread_pool, bool force_restore, bool force_attach) override; + void shutdown() override; friend struct DatabaseReplicatedTask; diff --git a/src/Databases/IDatabase.h b/src/Databases/IDatabase.h index 4c9350905c3..dc8c24e0bcc 100644 --- a/src/Databases/IDatabase.h +++ b/src/Databases/IDatabase.h @@ -5,6 +5,7 @@ #include #include #include +#include #include #include //FIXME @@ -33,6 +34,7 @@ namespace ErrorCodes { extern const int NOT_IMPLEMENTED; extern const int CANNOT_GET_CREATE_TABLE_QUERY; + extern const int LOGICAL_ERROR; } class IDatabaseTablesIterator @@ -136,6 +138,13 @@ public: virtual bool supportsLoadingInTopologicalOrder() const { return false; } + virtual void beforeLoadingMetadata( + ContextMutablePtr /*context*/, + bool /*has_force_restore_data_flag*/, + bool /*force_attach*/) + { + } + virtual void loadTablesMetadata(ContextPtr /*local_context*/, ParsedTablesMetadata & /*metadata*/) { throw Exception(ErrorCodes::LOGICAL_ERROR, "Not implemented"); @@ -146,7 +155,7 @@ public: throw Exception(ErrorCodes::LOGICAL_ERROR, "Not implemented"); } - virtual void startupTables() {} + virtual void startupTables(ThreadPool & /*thread_pool*/, bool /*force_restore*/, bool /*force_attach*/) {} /// Check the existence of the table. virtual bool isTableExist(const String & name, ContextPtr context) const = 0; diff --git a/src/Databases/MySQL/DatabaseMaterializedMySQL.cpp b/src/Databases/MySQL/DatabaseMaterializedMySQL.cpp index 0d81a4e1a98..87ec461026e 100644 --- a/src/Databases/MySQL/DatabaseMaterializedMySQL.cpp +++ b/src/Databases/MySQL/DatabaseMaterializedMySQL.cpp @@ -94,10 +94,10 @@ void DatabaseMaterializedMySQL::setException(const std::exception_ptr & ex } template -void DatabaseMaterializedMySQL::loadStoredObjects( - ContextMutablePtr context_, bool has_force_restore_data_flag, bool force_attach, bool skip_startup_tables) +void DatabaseMaterializedMySQL::startupTables(ThreadPool & thread_pool, bool force_restore, bool force_attach) { - Base::loadStoredObjects(context_, has_force_restore_data_flag, force_attach, skip_startup_tables); + Base::startupTables(thread_pool, force_attach, force_restore); + if (!force_attach) materialize_thread.assertMySQLAvailable(); diff --git a/src/Databases/MySQL/DatabaseMaterializedMySQL.h b/src/Databases/MySQL/DatabaseMaterializedMySQL.h index 292edc97878..ac32607a22c 100644 --- a/src/Databases/MySQL/DatabaseMaterializedMySQL.h +++ b/src/Databases/MySQL/DatabaseMaterializedMySQL.h @@ -43,7 +43,7 @@ protected: public: String getEngineName() const override { return "MaterializedMySQL"; } - void loadStoredObjects(ContextMutablePtr context_, bool has_force_restore_data_flag, bool force_attach, bool skip_startup_tables) override; + void startupTables(ThreadPool & thread_pool, bool force_restore, bool force_attach) override; void createTable(ContextPtr context_, const String & name, const StoragePtr & table, const ASTPtr & query) override; diff --git a/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.cpp b/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.cpp index c9ea8d12ef2..3e0d8e1d300 100644 --- a/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.cpp +++ b/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.cpp @@ -109,11 +109,9 @@ void DatabaseMaterializedPostgreSQL::startSynchronization() } -void DatabaseMaterializedPostgreSQL::loadStoredObjects( - ContextMutablePtr local_context, bool has_force_restore_data_flag, bool force_attach, bool skip_startup_tables) +void DatabaseMaterializedPostgreSQL::startupTables(ThreadPool & thread_pool, bool force_restore, bool force_attach) { - DatabaseAtomic::loadStoredObjects(local_context, has_force_restore_data_flag, force_attach, skip_startup_tables); - + DatabaseAtomic::startupTables(thread_pool, force_restore, force_attach); try { startSynchronization(); diff --git a/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.h b/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.h index 915bf44f1f2..c5b3c9fcede 100644 --- a/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.h +++ b/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.h @@ -43,7 +43,7 @@ public: String getMetadataPath() const override { return metadata_path; } - void loadStoredObjects(ContextMutablePtr, bool, bool force_attach, bool skip_startup_tables) override; + void startupTables(ThreadPool & thread_pool, bool force_restore, bool force_attach) override; DatabaseTablesIteratorPtr getTablesIterator(ContextPtr context, const DatabaseOnDisk::FilterByNameFunction & filter_by_table_name) const override; diff --git a/src/Databases/TablesLoader.cpp b/src/Databases/TablesLoader.cpp index e1d79a0b826..ded359790cb 100644 --- a/src/Databases/TablesLoader.cpp +++ b/src/Databases/TablesLoader.cpp @@ -1,8 +1,20 @@ #include +#include +#include +#include +#include +#include +#include +#include namespace DB { +namespace ErrorCodes +{ + extern const int INFINITE_LOOP; +} + static constexpr size_t PRINT_MESSAGE_EACH_N_OBJECTS = 256; static constexpr size_t PRINT_MESSAGE_EACH_N_SECONDS = 5; @@ -29,19 +41,23 @@ TablesLoader::TablesLoader(ContextMutablePtr global_context_, Databases database void TablesLoader::loadTables() { + bool need_resolve_dependencies = !global_context->getConfigRef().has("ignore_table_dependencies_on_metadata_loading"); for (auto & database : databases) { - if (database->supportsLoadingInTopologicalOrder()) + if (need_resolve_dependencies && database->supportsLoadingInTopologicalOrder()) databases_to_load.emplace(database->getDatabaseName(), database); else database->loadStoredObjects(global_context, force_restore, force_attach, true); } for (auto & database : databases_to_load) + { + database.second->beforeLoadingMetadata(global_context, force_restore, force_attach); database.second->loadTablesMetadata(global_context, all_tables); + } - auto table_does_not_exist = [&](const QualifiedTableName & table_name, const QualifiedTableName & dependency_name) - { + auto table_does_not_exist = [this](const QualifiedTableName & table_name, const QualifiedTableName & dependency_name) + { if (all_tables.metadata.contains(dependency_name)) return false; if (DatabaseCatalog::instance().isTableExist(StorageID(dependency_name.database, dependency_name.table), global_context)) @@ -50,7 +66,7 @@ void TablesLoader::loadTables() LOG_WARNING(log, "Table {} depends on {}, but seems like the second one does not exist", table_name, dependency_name); return true; - }; + }; removeDependencies(table_does_not_exist, all_tables.independent_tables); @@ -58,10 +74,10 @@ void TablesLoader::loadTables() //for (const auto & dependencies : all_tables.table_dependencies) // LOG_TRACE(log, "Database object {} depends on: {}", dependencies.first, fmt::join(dependencies.second, ", ")); - auto is_dependency_loaded = [&](const QualifiedTableName & /*table_name*/, const QualifiedTableName & dependency_name) - { + auto is_dependency_loaded = [this](const QualifiedTableName & /*table_name*/, const QualifiedTableName & dependency_name) + { return all_tables.independent_tables.contains(dependency_name); - }; + }; AtomicStopwatch watch; ThreadPool pool; @@ -81,7 +97,7 @@ void TablesLoader::loadTables() for (auto & database : databases_to_load) { - database.second->startupTables(); + database.second->startupTables(pool, force_restore, force_attach); } } @@ -121,7 +137,7 @@ void TablesLoader::startLoadingIndependentTables(ThreadPool & pool, AtomicStopwa for (const auto & table_name : all_tables.independent_tables) { - pool.scheduleOrThrowOnError([&]() + pool.scheduleOrThrowOnError([this, total_tables, &table_name, &watch]() { const auto & path_and_query = all_tables.metadata[table_name]; const auto & path = path_and_query.first; @@ -141,8 +157,7 @@ void TablesLoader::checkCyclicDependencies() const for (const auto & dependencies : all_tables.table_dependencies) { - LOG_WARNING(log, "Cannot resolve dependencies: Table {} depends on {}", - dependencies.first, fmt::join(dependencies.second, ", ")); + LOG_WARNING(log, "Cannot resolve dependencies: Table {} depends on {}", dependencies.first, fmt::join(dependencies.second, ", ")); } throw Exception(ErrorCodes::INFINITE_LOOP, "Cannot attach {} tables due to cyclic dependencies. " diff --git a/src/Databases/TablesLoader.h b/src/Databases/TablesLoader.h index f46929ec179..fface310bb6 100644 --- a/src/Databases/TablesLoader.h +++ b/src/Databases/TablesLoader.h @@ -1,19 +1,26 @@ #pragma once -#include -#include -#include +#include #include -#include -#include +#include +#include #include +#include +#include +#include +#include + +namespace Poco +{ + class Logger; +} + +class AtomicStopwatch; namespace DB { -namespace ErrorCodes -{ - extern const int INFINITE_LOOP; -} +class IDatabase; +using DatabasePtr = std::shared_ptr; void logAboutProgress(Poco::Logger * log, size_t processed, size_t total, AtomicStopwatch & watch); diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index 5bddcb9fe1d..d885b9a2ac5 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -275,7 +275,7 @@ BlockIO InterpreterCreateQuery::createDatabase(ASTCreateQuery & create) if (!load_database_without_tables) { /// We use global context here, because storages lifetime is bigger than query context lifetime - TablesLoader loader{getContext()->getGlobalContext(), {database}, has_force_restore_data_flag, create.attach && force_attach}; + TablesLoader loader{getContext()->getGlobalContext(), {database}, has_force_restore_data_flag, create.attach && force_attach}; //-V560 loader.loadTables(); } } diff --git a/tests/queries/0_stateless/01160_table_dependencies.sh b/tests/queries/0_stateless/01160_table_dependencies.sh index ecd941a09b1..0ea213ba5ff 100755 --- a/tests/queries/0_stateless/01160_table_dependencies.sh +++ b/tests/queries/0_stateless/01160_table_dependencies.sh @@ -30,7 +30,7 @@ s String default dictGet($CLICKHOUSE_DATABASE.dict1, 's', 42::UInt64)) engine=Me CLICKHOUSE_CLIENT_DEFAULT_DB=$(echo ${CLICKHOUSE_CLIENT} | sed 's/'"--database=${CLICKHOUSE_DATABASE}"'/--database=default/g') -for i in {1..10}; do +for _ in {1..10}; do $CLICKHOUSE_CLIENT_DEFAULT_DB -q "detach database $CLICKHOUSE_DATABASE;" $CLICKHOUSE_CLIENT_DEFAULT_DB -q "attach database $CLICKHOUSE_DATABASE;" done From 024a24aaf781598fe5b42abde8ff244b5f71c696 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Wed, 1 Sep 2021 22:42:49 +0300 Subject: [PATCH 17/80] better code, moar logging --- programs/local/LocalServer.cpp | 1 + programs/server/Server.cpp | 1 + src/Databases/DDLDependencyVisitor.cpp | 2 +- src/Databases/DDLDependencyVisitor.h | 4 +- src/Databases/DatabaseOnDisk.cpp | 2 +- src/Databases/DatabaseOrdinary.cpp | 12 +- .../MySQL/DatabaseMaterializedMySQL.cpp | 2 +- src/Databases/TablesLoader.cpp | 203 ++++++++++++------ src/Databases/TablesLoader.h | 56 ++++- .../ExternalDictionariesLoader.cpp | 4 + src/Interpreters/ExternalDictionariesLoader.h | 2 + src/Interpreters/ExternalLoader.cpp | 2 + src/Interpreters/InterpreterCreateQuery.cpp | 3 +- src/Interpreters/InterpreterCreateQuery.h | 6 - src/Interpreters/loadMetadata.cpp | 17 +- src/Interpreters/loadMetadata.h | 3 + 16 files changed, 230 insertions(+), 90 deletions(-) diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index 2b1b6185321..284fbc9f66c 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -303,6 +303,7 @@ try loadMetadataSystem(global_context); attachSystemTables(global_context); loadMetadata(global_context); + startupSystemTables(); DatabaseCatalog::instance().loadDatabases(); LOG_DEBUG(log, "Loaded metadata."); } diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index ddbc4c4e433..1371d36ce5c 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -1127,6 +1127,7 @@ if (ThreadFuzzer::instance().isEffective()) attachSystemTablesServer(*database_catalog.getSystemDatabase(), has_zookeeper); /// Then, load remaining databases loadMetadata(global_context, default_database); + startupSystemTables(); database_catalog.loadDatabases(); /// After loading validate that default database exists database_catalog.assertDatabaseExists(default_database); diff --git a/src/Databases/DDLDependencyVisitor.cpp b/src/Databases/DDLDependencyVisitor.cpp index 7408e74f012..e11d4739604 100644 --- a/src/Databases/DDLDependencyVisitor.cpp +++ b/src/Databases/DDLDependencyVisitor.cpp @@ -78,7 +78,7 @@ void DDLDependencyVisitor::extractTableNameFromArgument(const ASTFunction & func if (database_name.empty()) database_name = data.default_database; - data.dependencies.push_back(QualifiedTableName{std::move(database_name), std::move(table_name)}); + data.dependencies.emplace(QualifiedTableName{std::move(database_name), std::move(table_name)}); } } diff --git a/src/Databases/DDLDependencyVisitor.h b/src/Databases/DDLDependencyVisitor.h index 708e0bca66e..43dbbef9e25 100644 --- a/src/Databases/DDLDependencyVisitor.h +++ b/src/Databases/DDLDependencyVisitor.h @@ -14,9 +14,9 @@ class DDLDependencyVisitor public: struct Data { - using TableDependencies = std::vector; + using TableNamesSet = std::set; String default_database; - TableDependencies dependencies; + TableNamesSet dependencies; }; using Visitor = ConstInDepthNodeVisitor; diff --git a/src/Databases/DatabaseOnDisk.cpp b/src/Databases/DatabaseOnDisk.cpp index dad059a2008..620e560b64c 100644 --- a/src/Databases/DatabaseOnDisk.cpp +++ b/src/Databases/DatabaseOnDisk.cpp @@ -608,7 +608,7 @@ void DatabaseOnDisk::iterateMetadataFiles(ContextPtr local_context, const Iterat } /// Read and parse metadata in parallel - ThreadPool pool{1}; + ThreadPool pool; for (const auto & file : metadata_files) { pool.scheduleOrThrowOnError([&]() diff --git a/src/Databases/DatabaseOrdinary.cpp b/src/Databases/DatabaseOrdinary.cpp index 567bf8726e3..77bde83aa65 100644 --- a/src/Databases/DatabaseOrdinary.cpp +++ b/src/Databases/DatabaseOrdinary.cpp @@ -190,9 +190,17 @@ void DatabaseOrdinary::loadTablesMetadata(ContextPtr local_context, ParsedTables std::lock_guard lock{metadata.mutex}; metadata.metadata[qualified_name] = std::make_pair(full_path.string(), std::move(ast)); if (data.dependencies.empty()) - metadata.independent_tables.insert(std::move(qualified_name)); + { + metadata.independent_tables.emplace_back(std::move(qualified_name)); + } else - metadata.table_dependencies.insert({std::move(qualified_name), std::move(data.dependencies)}); + { + for (const auto & dependency : data.dependencies) + { + metadata.dependencies_info[dependency].dependent_tables.push_back(qualified_name); + ++metadata.dependencies_info[qualified_name].dependencies_count; + } + } metadata.total_dictionaries += create_query->is_dictionary; } } diff --git a/src/Databases/MySQL/DatabaseMaterializedMySQL.cpp b/src/Databases/MySQL/DatabaseMaterializedMySQL.cpp index 87ec461026e..2b4649c275a 100644 --- a/src/Databases/MySQL/DatabaseMaterializedMySQL.cpp +++ b/src/Databases/MySQL/DatabaseMaterializedMySQL.cpp @@ -96,7 +96,7 @@ void DatabaseMaterializedMySQL::setException(const std::exception_ptr & ex template void DatabaseMaterializedMySQL::startupTables(ThreadPool & thread_pool, bool force_restore, bool force_attach) { - Base::startupTables(thread_pool, force_attach, force_restore); + Base::startupTables(thread_pool, force_restore, force_attach); if (!force_attach) materialize_thread.assertMySQLAvailable(); diff --git a/src/Databases/TablesLoader.cpp b/src/Databases/TablesLoader.cpp index ded359790cb..4aa8f422043 100644 --- a/src/Databases/TablesLoader.cpp +++ b/src/Databases/TablesLoader.cpp @@ -2,10 +2,11 @@ #include #include #include +#include #include #include -#include #include +#include namespace DB { @@ -42,94 +43,146 @@ TablesLoader::TablesLoader(ContextMutablePtr global_context_, Databases database void TablesLoader::loadTables() { bool need_resolve_dependencies = !global_context->getConfigRef().has("ignore_table_dependencies_on_metadata_loading"); + + /// Load all Lazy, MySQl, PostgreSQL, SQLite, etc databases first. for (auto & database : databases) { - if (need_resolve_dependencies && database->supportsLoadingInTopologicalOrder()) - databases_to_load.emplace(database->getDatabaseName(), database); + if (need_resolve_dependencies && database.second->supportsLoadingInTopologicalOrder()) + databases_to_load.push_back(database.first); else - database->loadStoredObjects(global_context, force_restore, force_attach, true); + database.second->loadStoredObjects(global_context, force_restore, force_attach, true); } - for (auto & database : databases_to_load) + /// Read and parse metadata from Ordinary, Atomic, Materialized*, Replicated, etc databases. Build dependency graph. + for (auto & database_name : databases_to_load) { - database.second->beforeLoadingMetadata(global_context, force_restore, force_attach); - database.second->loadTablesMetadata(global_context, all_tables); + databases[database_name]->beforeLoadingMetadata(global_context, force_restore, force_attach); + databases[database_name]->loadTablesMetadata(global_context, all_tables); } - auto table_does_not_exist = [this](const QualifiedTableName & table_name, const QualifiedTableName & dependency_name) + LOG_INFO(log, "Parsed metadata of {} tables in {} sec", all_tables.metadata.size(), stopwatch.elapsedSeconds()); + stopwatch.restart(); + + logDependencyGraph(); + + /// Some tables were loaded by database with loadStoredObjects(...). Remove them from graph if necessary. + removeUnresolvableDependencies(); + + loadTablesInTopologicalOrder(pool); +} + +void TablesLoader::startupTables() +{ + /// Startup tables after all tables are loaded. Background tasks (merges, mutations, etc) may slow down data parts loading. + for (auto & database : databases) + database.second->startupTables(pool, force_restore, force_attach); +} + + +void TablesLoader::removeUnresolvableDependencies() +{ + auto need_exclude_dependency = [this](const QualifiedTableName & dependency_name, const DependenciesInfo & info) { + /// Table exists and will be loaded if (all_tables.metadata.contains(dependency_name)) return false; + /// Table exists and it's already loaded if (DatabaseCatalog::instance().isTableExist(StorageID(dependency_name.database, dependency_name.table), global_context)) return false; - /// FIXME if XML dict + /// It's XML dictionary. It was loaded before tables and DDL dictionaries. + if (dependency_name.database == all_tables.default_database && + global_context->getExternalDictionariesLoader().has(dependency_name.table)) + return false; + + /// Some tables depends on table "dependency_name", but there is no such table in DatabaseCatalog and we don't have its metadata. + /// We will ignore it and try to load dependent tables without "dependency_name" + /// (but most likely dependent tables will fail to load). + LOG_WARNING(log, "Tables {} depend on {}, but seems like the it does not exist. Will ignore it and try to load existing tables", + fmt::join(info.dependent_tables, ", "), dependency_name); + + if (info.dependencies_count) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Table {} does not exist, but we have seen its AST and found {} dependencies." + "It's a bug", dependency_name, info.dependencies_count); + if (info.dependent_tables.empty()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Table {} does not have dependencies and dependent tables as it expected to." + "It's a bug", dependency_name); - LOG_WARNING(log, "Table {} depends on {}, but seems like the second one does not exist", table_name, dependency_name); return true; }; - removeDependencies(table_does_not_exist, all_tables.independent_tables); - - //LOG_TRACE(log, "Independent database objects: {}", fmt::join(all_tables.independent_tables, ", ")); - //for (const auto & dependencies : all_tables.table_dependencies) - // LOG_TRACE(log, "Database object {} depends on: {}", dependencies.first, fmt::join(dependencies.second, ", ")); - - auto is_dependency_loaded = [this](const QualifiedTableName & /*table_name*/, const QualifiedTableName & dependency_name) + auto table_it = all_tables.dependencies_info.begin(); + while (table_it != all_tables.dependencies_info.end()) { - return all_tables.independent_tables.contains(dependency_name); - }; + auto & info = table_it->second; + if (need_exclude_dependency(table_it->first, info)) + table_it = removeResolvedDependency(table_it, all_tables.independent_tables); + else + ++table_it; + } +} - AtomicStopwatch watch; - ThreadPool pool; +void TablesLoader::loadTablesInTopologicalOrder(ThreadPool & pool) +{ + /// While we have some independent tables to load, load them in parallel. + /// Then remove independent tables from graph and find new ones. size_t level = 0; do { - assert(all_tables.metadata.size() == tables_processed + all_tables.independent_tables.size() + all_tables.table_dependencies.size()); - startLoadingIndependentTables(pool, watch, level); - std::unordered_set new_independent_tables; - removeDependencies(is_dependency_loaded, new_independent_tables); + assert(all_tables.metadata.size() == tables_processed + all_tables.independent_tables.size() + getNumberOfTablesWithDependencies()); + logDependencyGraph(); + + startLoadingIndependentTables(pool, level); + + TableNames new_independent_tables; + for (const auto & table_name : all_tables.independent_tables) + { + auto info_it = all_tables.dependencies_info.find(table_name); + if (info_it == all_tables.dependencies_info.end()) + { + /// No tables depend on table_name and it was not even added to dependencies_info + continue; + } + removeResolvedDependency(info_it, new_independent_tables); + } + pool.wait(); + all_tables.independent_tables = std::move(new_independent_tables); - checkCyclicDependencies(); ++level; - assert(all_tables.metadata.size() == tables_processed + all_tables.independent_tables.size() + all_tables.table_dependencies.size()); } while (!all_tables.independent_tables.empty()); - for (auto & database : databases_to_load) - { - database.second->startupTables(pool, force_restore, force_attach); - } + checkCyclicDependencies(); } -void TablesLoader::removeDependencies(RemoveDependencyPredicate need_remove_dependency, std::unordered_set & independent_tables) +DependenciesInfosIter TablesLoader::removeResolvedDependency(const DependenciesInfosIter & info_it, TableNames & independent_tables) { - auto table_it = all_tables.table_dependencies.begin(); - while (table_it != all_tables.table_dependencies.end()) - { - auto & dependencies = table_it->second; - assert(!dependencies.empty()); - auto dependency_it = dependencies.begin(); - while (dependency_it != dependencies.end()) - { - if (need_remove_dependency(table_it->first, *dependency_it)) - dependency_it = dependencies.erase(dependency_it); - else - ++dependency_it; - } + auto & info = info_it->second; + if (info.dependencies_count) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Table {} is in list of independent tables, but dependencies count is {}." + "It's a bug", info_it->first, info.dependencies_count); + if (info.dependent_tables.empty()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Table {} does not have dependent tables. It's a bug", info_it->first); - if (dependencies.empty()) + /// Decrement number of dependencies for each dependent table + for (auto & dependent_table : info.dependent_tables) + { + auto & dependent_info = all_tables.dependencies_info[dependent_table]; + auto & dependencies_count = dependent_info.dependencies_count; + if (dependencies_count == 0) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Trying to decrement 0 dependencies counter for {}. It's a bug", dependent_table); + --dependencies_count; + if (dependencies_count == 0) { - independent_tables.emplace(std::move(table_it->first)); - table_it = all_tables.table_dependencies.erase(table_it); - } - else - { - ++table_it; + independent_tables.push_back(dependent_table); + if (dependent_info.dependent_tables.empty()) + all_tables.dependencies_info.erase(dependent_table); } } + + return all_tables.dependencies_info.erase(info_it); } -void TablesLoader::startLoadingIndependentTables(ThreadPool & pool, AtomicStopwatch & watch, size_t level) +void TablesLoader::startLoadingIndependentTables(ThreadPool & pool, size_t level) { size_t total_tables = all_tables.metadata.size(); @@ -137,32 +190,56 @@ void TablesLoader::startLoadingIndependentTables(ThreadPool & pool, AtomicStopwa for (const auto & table_name : all_tables.independent_tables) { - pool.scheduleOrThrowOnError([this, total_tables, &table_name, &watch]() + pool.scheduleOrThrowOnError([this, total_tables, &table_name]() { const auto & path_and_query = all_tables.metadata[table_name]; const auto & path = path_and_query.first; const auto & ast = path_and_query.second; - databases_to_load[table_name.database]->loadTableFromMetadata(global_context, path, table_name, ast, force_restore); - logAboutProgress(log, ++tables_processed, total_tables, watch); + databases[table_name.database]->loadTableFromMetadata(global_context, path, table_name, ast, force_restore); + logAboutProgress(log, ++tables_processed, total_tables, stopwatch); }); } } +size_t TablesLoader::getNumberOfTablesWithDependencies() const +{ + size_t number_of_tables_with_dependencies = 0; + for (const auto & info : all_tables.dependencies_info) + if (info.second.dependencies_count) + ++number_of_tables_with_dependencies; + return number_of_tables_with_dependencies; +} + void TablesLoader::checkCyclicDependencies() const { - if (!all_tables.independent_tables.empty()) - return; - if (all_tables.table_dependencies.empty()) + /// Loading is finished if all dependencies are resolved + if (all_tables.dependencies_info.empty()) return; - for (const auto & dependencies : all_tables.table_dependencies) + for (const auto & info : all_tables.dependencies_info) { - LOG_WARNING(log, "Cannot resolve dependencies: Table {} depends on {}", dependencies.first, fmt::join(dependencies.second, ", ")); + LOG_WARNING(log, "Cannot resolve dependencies: Table {} have {} dependencies and {} dependent tables. List of dependent tables: {}", + info.first, info.second.dependencies_count, + info.second.dependent_tables.size(), fmt::join(info.second.dependent_tables, ", ")); + assert(info.second.dependencies_count == 0); } throw Exception(ErrorCodes::INFINITE_LOOP, "Cannot attach {} tables due to cyclic dependencies. " - "See server log for details.", all_tables.table_dependencies.size()); + "See server log for details.", all_tables.dependencies_info.size()); +} + +void TablesLoader::logDependencyGraph() const +{ + LOG_TRACE(log, "Have {} independent tables: {}", all_tables.independent_tables.size(), fmt::join(all_tables.independent_tables, ", ")); + for (const auto & dependencies : all_tables.dependencies_info) + { + LOG_TRACE(log, + "Table {} have {} dependencies and {} dependent tables. List of dependent tables: {}", + dependencies.first, + dependencies.second.dependencies_count, + dependencies.second.dependent_tables.size(), + fmt::join(dependencies.second.dependent_tables, ", ")); + } } } - diff --git a/src/Databases/TablesLoader.h b/src/Databases/TablesLoader.h index fface310bb6..35dae8a5ad6 100644 --- a/src/Databases/TablesLoader.h +++ b/src/Databases/TablesLoader.h @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -19,32 +20,59 @@ class AtomicStopwatch; namespace DB { +void logAboutProgress(Poco::Logger * log, size_t processed, size_t total, AtomicStopwatch & watch); + + class IDatabase; using DatabasePtr = std::shared_ptr; -void logAboutProgress(Poco::Logger * log, size_t processed, size_t total, AtomicStopwatch & watch); +using ParsedMetadata = std::map>; +using TableNames = std::vector; + +struct DependenciesInfo +{ + /// How many dependencies this table have + size_t dependencies_count = 0; + /// List of tables which depend on this table + TableNames dependent_tables; +}; + +using DependenciesInfos = std::unordered_map; +using DependenciesInfosIter = std::unordered_map::iterator; struct ParsedTablesMetadata { String default_database; - using ParsedMetadata = std::map>; std::mutex mutex; ParsedMetadata metadata; + + /// For logging size_t total_dictionaries = 0; - std::unordered_set independent_tables; - std::unordered_map> table_dependencies; + + /// List of tables that do not have any dependencies and can be loaded + TableNames independent_tables; + + /// Actually it contains two different maps (with, probably, intersecting keys): + /// 1. table name -> number of dependencies + /// 2. table name -> dependent tables list (adjacency list of dependencies graph). + /// If table A depends on table B, then there is an edge B --> A, i.e. dependencies_info[B].dependent_tables contains A. + /// And dependencies_info[C].dependencies_count is a number of incoming edges for vertex C (how many tables we have to load before C). + DependenciesInfos dependencies_info; }; +/// Loads tables (and dictionaries) from specified databases +/// taking into account dependencies between them. class TablesLoader { public: - - using Databases = std::vector; + using Databases = std::map; TablesLoader(ContextMutablePtr global_context_, Databases databases_, bool force_restore_ = false, bool force_attach_ = false); + TablesLoader() = delete; void loadTables(); + void startupTables(); private: ContextMutablePtr global_context; @@ -52,19 +80,27 @@ private: bool force_restore; bool force_attach; - std::map databases_to_load; + Strings databases_to_load; ParsedTablesMetadata all_tables; Poco::Logger * log; std::atomic tables_processed{0}; + AtomicStopwatch stopwatch; + ThreadPool pool; - using RemoveDependencyPredicate = std::function; - void removeDependencies(RemoveDependencyPredicate need_remove_dependency, std::unordered_set & independent_tables); + void removeUnresolvableDependencies(); - void startLoadingIndependentTables(ThreadPool & pool, AtomicStopwatch & watch, size_t level); + void loadTablesInTopologicalOrder(ThreadPool & pool); + + DependenciesInfosIter removeResolvedDependency(const DependenciesInfosIter & info_it, TableNames & independent_tables); + + void startLoadingIndependentTables(ThreadPool & pool, size_t level); void checkCyclicDependencies() const; + size_t getNumberOfTablesWithDependencies() const; + + void logDependencyGraph() const; }; } diff --git a/src/Interpreters/ExternalDictionariesLoader.cpp b/src/Interpreters/ExternalDictionariesLoader.cpp index cbb0e52b91b..2cbcf9e362c 100644 --- a/src/Interpreters/ExternalDictionariesLoader.cpp +++ b/src/Interpreters/ExternalDictionariesLoader.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #if !defined(ARCADIA_BUILD) # include "config_core.h" @@ -30,6 +31,7 @@ ExternalDictionariesLoader::ExternalDictionariesLoader(ContextPtr global_context setConfigSettings({"dictionary", "name", "database", "uuid"}); enableAsyncLoading(true); enablePeriodicUpdates(true); + log = &Poco::Logger::get("EDL"); } ExternalLoader::LoadablePtr ExternalDictionariesLoader::create( @@ -89,12 +91,14 @@ DictionaryStructure ExternalDictionariesLoader::getDictionaryStructure(const std std::string ExternalDictionariesLoader::resolveDictionaryName(const std::string & dictionary_name, const std::string & current_database_name) const { + LOG_INFO(log, "Looking for {} ({})", dictionary_name, current_database_name); bool has_dictionary = has(dictionary_name); if (has_dictionary) return dictionary_name; std::string resolved_name = resolveDictionaryNameFromDatabaseCatalog(dictionary_name); has_dictionary = has(resolved_name); + LOG_INFO(log, "Got resolved name {}, {}", resolved_name, has_dictionary); if (!has_dictionary) { diff --git a/src/Interpreters/ExternalDictionariesLoader.h b/src/Interpreters/ExternalDictionariesLoader.h index 06f64ef30c5..3e698cb5d66 100644 --- a/src/Interpreters/ExternalDictionariesLoader.h +++ b/src/Interpreters/ExternalDictionariesLoader.h @@ -4,6 +4,7 @@ #include #include #include +#include #include @@ -46,6 +47,7 @@ protected: friend class StorageSystemDictionaries; friend class DatabaseDictionary; + Poco::Logger * log; }; } diff --git a/src/Interpreters/ExternalLoader.cpp b/src/Interpreters/ExternalLoader.cpp index eb7824a1124..10051026f6a 100644 --- a/src/Interpreters/ExternalLoader.cpp +++ b/src/Interpreters/ExternalLoader.cpp @@ -14,6 +14,7 @@ #include #include #include +#include namespace CurrentStatusInfo @@ -467,6 +468,7 @@ public: if (infos.find(name) == infos.end()) { Info & info = infos.emplace(name, Info{name, config}).first->second; + LOG_TRACE(log, "Inserted {} into infos", name); if (always_load_everything) { LOG_TRACE(log, "Will load '{}' because always_load_everything flag is set.", name); diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index d885b9a2ac5..db4b8a72a7d 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -275,8 +275,9 @@ BlockIO InterpreterCreateQuery::createDatabase(ASTCreateQuery & create) if (!load_database_without_tables) { /// We use global context here, because storages lifetime is bigger than query context lifetime - TablesLoader loader{getContext()->getGlobalContext(), {database}, has_force_restore_data_flag, create.attach && force_attach}; //-V560 + TablesLoader loader{getContext()->getGlobalContext(), {{database_name, database}}, has_force_restore_data_flag, create.attach && force_attach}; //-V560 loader.loadTables(); + loader.startupTables(); } } catch (...) diff --git a/src/Interpreters/InterpreterCreateQuery.h b/src/Interpreters/InterpreterCreateQuery.h index 47d0e2f492d..89d27a30555 100644 --- a/src/Interpreters/InterpreterCreateQuery.h +++ b/src/Interpreters/InterpreterCreateQuery.h @@ -52,11 +52,6 @@ public: force_attach = force_attach_; } - void setSkipStartupTables(bool skip_startup_tables_) - { - skip_startup_tables = skip_startup_tables_; - } - void setLoadDatabaseWithoutTables(bool load_database_without_tables_) { load_database_without_tables = load_database_without_tables_; @@ -104,7 +99,6 @@ private: /// Is this an internal query - not from the user. bool internal = false; bool force_attach = false; - bool skip_startup_tables = false; bool load_database_without_tables = false; mutable String as_database_saved; diff --git a/src/Interpreters/loadMetadata.cpp b/src/Interpreters/loadMetadata.cpp index e4c73e7d4e5..a6563553470 100644 --- a/src/Interpreters/loadMetadata.cpp +++ b/src/Interpreters/loadMetadata.cpp @@ -44,8 +44,7 @@ static void executeCreateQuery( interpreter.setInternal(true); interpreter.setForceAttach(true); interpreter.setForceRestoreData(has_force_restore_data_flag); - interpreter.setSkipStartupTables(true); - interpreter.setLoadDatabaseWithoutTables(database != DatabaseCatalog::SYSTEM_DATABASE); + interpreter.setLoadDatabaseWithoutTables(true); interpreter.execute(); } @@ -161,11 +160,12 @@ void loadMetadata(ContextMutablePtr context, const String & default_database_nam for (const auto & [name, db_path] : databases) { loadDatabase(context, name, db_path, has_force_restore_data_flag); - loaded_databases.emplace_back(DatabaseCatalog::instance().getDatabase(name)); + loaded_databases.insert({name, DatabaseCatalog::instance().getDatabase(name)}); } TablesLoader loader{context, std::move(loaded_databases), has_force_restore_data_flag, /* force_attach */ true}; loader.loadTables(); + loader.startupTables(); if (has_force_restore_data_flag) { @@ -199,6 +199,17 @@ void loadMetadataSystem(ContextMutablePtr context) executeCreateQuery(database_create_query, context, DatabaseCatalog::SYSTEM_DATABASE, "", true); } + TablesLoader loader{context, {{DatabaseCatalog::SYSTEM_DATABASE, DatabaseCatalog::instance().getSystemDatabase()}}, + /* force_restore */true, /* force_attach */ true}; + loader.loadTables(); + /// Will startup tables in system database after all databases are loaded. +} + + +void startupSystemTables() +{ + ThreadPool pool; + DatabaseCatalog::instance().getSystemDatabase()->startupTables(pool, /* force_restore */true, /* force_attach */ true); } } diff --git a/src/Interpreters/loadMetadata.h b/src/Interpreters/loadMetadata.h index cf038a42855..9ff4432b464 100644 --- a/src/Interpreters/loadMetadata.h +++ b/src/Interpreters/loadMetadata.h @@ -1,6 +1,7 @@ #pragma once #include +#include namespace DB @@ -13,4 +14,6 @@ void loadMetadataSystem(ContextMutablePtr context); /// Load tables from databases and add them to context. Database 'system' is ignored. Use separate function to load system tables. void loadMetadata(ContextMutablePtr context, const String & default_database_name = {}); +void startupSystemTables(); + } From 666a3aee99407e836a9c11fe8f09f6ba40b4e225 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Thu, 2 Sep 2021 16:34:46 +0300 Subject: [PATCH 18/80] add another test --- src/Databases/DDLDependencyVisitor.cpp | 30 +++++++++++++ src/Databases/DDLDependencyVisitor.h | 4 ++ src/Databases/DatabaseOrdinary.cpp | 2 + src/Databases/IDatabase.h | 3 +- src/Databases/TablesLoader.cpp | 8 ++-- .../ExternalDictionariesLoader.cpp | 4 -- src/Interpreters/ExternalDictionariesLoader.h | 2 - src/Interpreters/ExternalLoader.cpp | 2 - .../test_dictionaries_dependency_xml/test.py | 44 ++++++++++++++++++- 9 files changed, 85 insertions(+), 14 deletions(-) diff --git a/src/Databases/DDLDependencyVisitor.cpp b/src/Databases/DDLDependencyVisitor.cpp index e11d4739604..a5f2ce995a7 100644 --- a/src/Databases/DDLDependencyVisitor.cpp +++ b/src/Databases/DDLDependencyVisitor.cpp @@ -3,6 +3,9 @@ #include #include #include +#include +#include +#include namespace DB { @@ -11,6 +14,8 @@ void DDLDependencyVisitor::visit(const ASTPtr & ast, Data & data) { if (const auto * function = ast->as()) visit(*function, data); + else if (const auto * dict_source = ast->as()) + visit(*dict_source, data); } bool DDLDependencyVisitor::needChildVisit(const ASTPtr & node, const ASTPtr & /*child*/) @@ -29,6 +34,31 @@ void DDLDependencyVisitor::visit(const ASTFunction & function, Data & data) } } +void DDLDependencyVisitor::visit(const ASTFunctionWithKeyValueArguments & dict_source, Data & data) +{ + if (dict_source.name != "clickhouse") + return; + if (!dict_source.elements) + return; + + auto config = getDictionaryConfigurationFromAST(data.create_query->as(), data.global_context); + String host = config->getString("dictionary.source.clickhouse.host", ""); + UInt16 port = config->getUInt("dictionary.source.clickhouse.port", 0); + String database = config->getString("dictionary.source.clickhouse.db", ""); + String table = config->getString("dictionary.source.clickhouse.table", ""); + bool secure = config->getBool("dictionary.source.clickhouse.secure", false); + if (host.empty() || port == 0 || table.empty()) + return; + UInt16 default_port = secure ? data.global_context->getTCPPortSecure().value_or(0) : data.global_context->getTCPPort(); + if (!isLocalAddress({host, port}, default_port)) + return; + + if (database.empty()) + database = data.default_database; + data.dependencies.emplace(QualifiedTableName{std::move(database), std::move(table)}); +} + + void DDLDependencyVisitor::extractTableNameFromArgument(const ASTFunction & function, Data & data, size_t arg_idx) { /// Just ignore incorrect arguments, proper exception will be thrown later diff --git a/src/Databases/DDLDependencyVisitor.h b/src/Databases/DDLDependencyVisitor.h index 43dbbef9e25..1d26adb6e6d 100644 --- a/src/Databases/DDLDependencyVisitor.h +++ b/src/Databases/DDLDependencyVisitor.h @@ -7,6 +7,7 @@ namespace DB { class ASTFunction; +class ASTFunctionWithKeyValueArguments; class DDLDependencyVisitor @@ -17,6 +18,8 @@ public: using TableNamesSet = std::set; String default_database; TableNamesSet dependencies; + ContextPtr global_context; + ASTPtr create_query; }; using Visitor = ConstInDepthNodeVisitor; @@ -26,6 +29,7 @@ public: private: static void visit(const ASTFunction & function, Data & data); + static void visit(const ASTFunctionWithKeyValueArguments & dict_source, Data & data); static void extractTableNameFromArgument(const ASTFunction & function, Data & data, size_t arg_idx); }; diff --git a/src/Databases/DatabaseOrdinary.cpp b/src/Databases/DatabaseOrdinary.cpp index 77bde83aa65..4c73d3c30ff 100644 --- a/src/Databases/DatabaseOrdinary.cpp +++ b/src/Databases/DatabaseOrdinary.cpp @@ -183,6 +183,8 @@ void DatabaseOrdinary::loadTablesMetadata(ContextPtr local_context, ParsedTables TableLoadingDependenciesVisitor::Data data; data.default_database = metadata.default_database; + data.create_query = ast; + data.global_context = getContext(); TableLoadingDependenciesVisitor visitor{data}; visitor.visit(ast); QualifiedTableName qualified_name{database_name, create_query->table}; diff --git a/src/Databases/IDatabase.h b/src/Databases/IDatabase.h index dc8c24e0bcc..fe17312cc0b 100644 --- a/src/Databases/IDatabase.h +++ b/src/Databases/IDatabase.h @@ -8,8 +8,6 @@ #include #include -#include //FIXME - #include #include #include @@ -29,6 +27,7 @@ struct StorageID; class ASTCreateQuery; using DictionariesWithID = std::vector>; struct ParsedTablesMetadata; +struct QualifiedTableName; namespace ErrorCodes { diff --git a/src/Databases/TablesLoader.cpp b/src/Databases/TablesLoader.cpp index 4aa8f422043..30a9bdd324e 100644 --- a/src/Databases/TablesLoader.cpp +++ b/src/Databases/TablesLoader.cpp @@ -14,6 +14,7 @@ namespace DB namespace ErrorCodes { extern const int INFINITE_LOOP; + extern const int LOGICAL_ERROR; } static constexpr size_t PRINT_MESSAGE_EACH_N_OBJECTS = 256; @@ -60,7 +61,8 @@ void TablesLoader::loadTables() databases[database_name]->loadTablesMetadata(global_context, all_tables); } - LOG_INFO(log, "Parsed metadata of {} tables in {} sec", all_tables.metadata.size(), stopwatch.elapsedSeconds()); + LOG_INFO(log, "Parsed metadata of {} tables in {} databases in {} sec", + all_tables.metadata.size(), databases_to_load.size(), stopwatch.elapsedSeconds()); stopwatch.restart(); logDependencyGraph(); @@ -88,11 +90,11 @@ void TablesLoader::removeUnresolvableDependencies() return false; /// Table exists and it's already loaded if (DatabaseCatalog::instance().isTableExist(StorageID(dependency_name.database, dependency_name.table), global_context)) - return false; + return true; /// It's XML dictionary. It was loaded before tables and DDL dictionaries. if (dependency_name.database == all_tables.default_database && global_context->getExternalDictionariesLoader().has(dependency_name.table)) - return false; + return true; /// Some tables depends on table "dependency_name", but there is no such table in DatabaseCatalog and we don't have its metadata. /// We will ignore it and try to load dependent tables without "dependency_name" diff --git a/src/Interpreters/ExternalDictionariesLoader.cpp b/src/Interpreters/ExternalDictionariesLoader.cpp index 2cbcf9e362c..cbb0e52b91b 100644 --- a/src/Interpreters/ExternalDictionariesLoader.cpp +++ b/src/Interpreters/ExternalDictionariesLoader.cpp @@ -5,7 +5,6 @@ #include #include #include -#include #if !defined(ARCADIA_BUILD) # include "config_core.h" @@ -31,7 +30,6 @@ ExternalDictionariesLoader::ExternalDictionariesLoader(ContextPtr global_context setConfigSettings({"dictionary", "name", "database", "uuid"}); enableAsyncLoading(true); enablePeriodicUpdates(true); - log = &Poco::Logger::get("EDL"); } ExternalLoader::LoadablePtr ExternalDictionariesLoader::create( @@ -91,14 +89,12 @@ DictionaryStructure ExternalDictionariesLoader::getDictionaryStructure(const std std::string ExternalDictionariesLoader::resolveDictionaryName(const std::string & dictionary_name, const std::string & current_database_name) const { - LOG_INFO(log, "Looking for {} ({})", dictionary_name, current_database_name); bool has_dictionary = has(dictionary_name); if (has_dictionary) return dictionary_name; std::string resolved_name = resolveDictionaryNameFromDatabaseCatalog(dictionary_name); has_dictionary = has(resolved_name); - LOG_INFO(log, "Got resolved name {}, {}", resolved_name, has_dictionary); if (!has_dictionary) { diff --git a/src/Interpreters/ExternalDictionariesLoader.h b/src/Interpreters/ExternalDictionariesLoader.h index 3e698cb5d66..06f64ef30c5 100644 --- a/src/Interpreters/ExternalDictionariesLoader.h +++ b/src/Interpreters/ExternalDictionariesLoader.h @@ -4,7 +4,6 @@ #include #include #include -#include #include @@ -47,7 +46,6 @@ protected: friend class StorageSystemDictionaries; friend class DatabaseDictionary; - Poco::Logger * log; }; } diff --git a/src/Interpreters/ExternalLoader.cpp b/src/Interpreters/ExternalLoader.cpp index 10051026f6a..eb7824a1124 100644 --- a/src/Interpreters/ExternalLoader.cpp +++ b/src/Interpreters/ExternalLoader.cpp @@ -14,7 +14,6 @@ #include #include #include -#include namespace CurrentStatusInfo @@ -468,7 +467,6 @@ public: if (infos.find(name) == infos.end()) { Info & info = infos.emplace(name, Info{name, config}).first->second; - LOG_TRACE(log, "Inserted {} into infos", name); if (always_load_everything) { LOG_TRACE(log, "Will load '{}' because always_load_everything flag is set.", name); diff --git a/tests/integration/test_dictionaries_dependency_xml/test.py b/tests/integration/test_dictionaries_dependency_xml/test.py index cfd7d58d574..849fdf57980 100644 --- a/tests/integration/test_dictionaries_dependency_xml/test.py +++ b/tests/integration/test_dictionaries_dependency_xml/test.py @@ -6,7 +6,7 @@ DICTIONARY_FILES = ['configs/dictionaries/dep_x.xml', 'configs/dictionaries/dep_ 'configs/dictionaries/dep_z.xml'] cluster = ClickHouseCluster(__file__) -instance = cluster.add_instance('instance', dictionaries=DICTIONARY_FILES) +instance = cluster.add_instance('instance', dictionaries=DICTIONARY_FILES, stay_alive=True) @pytest.fixture(scope="module") @@ -73,3 +73,45 @@ def test_get_data(started_cluster): assert query("SELECT dictGetString('dep_x', 'a', toUInt64(4))") == "ether\n" assert query("SELECT dictGetString('dep_y', 'a', toUInt64(4))") == "ether\n" assert query("SELECT dictGetString('dep_z', 'a', toUInt64(4))") == "ether\n" + +def dependent_tables_assert(): + res = instance.query("select database || '.' || name from system.tables") + assert "system.join" in res + assert "default.src" in res + assert "dict.dep_y" in res + assert "lazy.log" in res + assert "test.d" in res + assert "default.join" in res + assert "a.t" in res + +def test_dependent_tables(started_cluster): + query = instance.query + query("create database lazy engine=Lazy(10)") + query("create database a") + query("create table lazy.src (n int, m int) engine=Log") + query("create dictionary a.d (n int default 0, m int default 42) primary key n " + "source(clickhouse(host 'localhost' port tcpPort() user 'default' table 'src' password '' db 'lazy'))" + "lifetime(min 1 max 10) layout(flat())") + query("create table system.join (n int, m int) engine=Join(any, left, n)") + query("insert into system.join values (1, 1)") + query("create table src (n int, m default joinGet('system.join', 'm', 1::int)," + "t default dictGetOrNull('a.d', 'm', toUInt64(3))," + "k default dictGet('a.d', 'm', toUInt64(4))) engine=MergeTree order by n") + query("create dictionary test.d (n int default 0, m int default 42) primary key n " + "source(clickhouse(host 'localhost' port tcpPort() user 'default' table 'src' password '' db 'default'))" + "lifetime(min 1 max 10) layout(flat())") + query("create table join (n int, m default dictGet('a.d', 'm', toUInt64(3))," + "k default dictGet('test.d', 'm', toUInt64(0))) engine=Join(any, left, n)") + query("create table lazy.log (n default dictGet(test.d, 'm', toUInt64(0))) engine=Log") + query("create table a.t (n default joinGet('system.join', 'm', 1::int)," + "m default dictGet('test.d', 'm', toUInt64(3))," + "k default joinGet(join, 'm', 1::int)) engine=MergeTree order by n") + + dependent_tables_assert() + instance.restart_clickhouse() + dependent_tables_assert() + query("drop database a") + query("drop database lazy") + query("drop table src") + query("drop table join") + query("drop table system.join") From 1e2844f999e564abcb32dfd04a3a8d21b6ea0432 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Thu, 2 Sep 2021 16:48:41 +0300 Subject: [PATCH 19/80] support function IN --- src/Databases/DDLDependencyVisitor.cpp | 6 ++++++ .../queries/0_stateless/01160_table_dependencies.reference | 1 + tests/queries/0_stateless/01160_table_dependencies.sh | 4 +++- 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/src/Databases/DDLDependencyVisitor.cpp b/src/Databases/DDLDependencyVisitor.cpp index a5f2ce995a7..2b70421641b 100644 --- a/src/Databases/DDLDependencyVisitor.cpp +++ b/src/Databases/DDLDependencyVisitor.cpp @@ -6,6 +6,7 @@ #include #include #include +#include namespace DB { @@ -32,6 +33,11 @@ void DDLDependencyVisitor::visit(const ASTFunction & function, Data & data) { extractTableNameFromArgument(function, data, 0); } + else if (Poco::toLower(function.name) == "in") + { + extractTableNameFromArgument(function, data, 1); + } + } void DDLDependencyVisitor::visit(const ASTFunctionWithKeyValueArguments & dict_source, Data & data) diff --git a/tests/queries/0_stateless/01160_table_dependencies.reference b/tests/queries/0_stateless/01160_table_dependencies.reference index 6691df07cb9..39a58b06076 100644 --- a/tests/queries/0_stateless/01160_table_dependencies.reference +++ b/tests/queries/0_stateless/01160_table_dependencies.reference @@ -2,4 +2,5 @@ dict1 dict2 dict_src join +s t diff --git a/tests/queries/0_stateless/01160_table_dependencies.sh b/tests/queries/0_stateless/01160_table_dependencies.sh index 0ea213ba5ff..149439f2981 100755 --- a/tests/queries/0_stateless/01160_table_dependencies.sh +++ b/tests/queries/0_stateless/01160_table_dependencies.sh @@ -25,8 +25,10 @@ PRIMARY KEY n SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'join' PASSWORD '' DB '$CLICKHOUSE_DATABASE')) LIFETIME(MIN 1 MAX 10) LAYOUT(FLAT());" +$CLICKHOUSE_CLIENT -q "create table s (x default joinGet($CLICKHOUSE_DATABASE.join, 'm', 42::int)) engine=Set" + $CLICKHOUSE_CLIENT -q "create table t (n int, m int default joinGet($CLICKHOUSE_DATABASE.join, 'm', 42::int), -s String default dictGet($CLICKHOUSE_DATABASE.dict1, 's', 42::UInt64)) engine=MergeTree order by n;" +s String default dictGet($CLICKHOUSE_DATABASE.dict1, 's', 42::UInt64), x default in(1, $CLICKHOUSE_DATABASE.s)) engine=MergeTree order by n;" CLICKHOUSE_CLIENT_DEFAULT_DB=$(echo ${CLICKHOUSE_CLIENT} | sed 's/'"--database=${CLICKHOUSE_DATABASE}"'/--database=default/g') From fbd2f40b051cdf030d83d9386b9afed19b366859 Mon Sep 17 00:00:00 2001 From: kssenii Date: Fri, 3 Sep 2021 09:04:46 +0000 Subject: [PATCH 20/80] Fix --- src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp | 1 + src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp | 6 ++++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp b/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp index 90917a0fd7e..f2860235117 100644 --- a/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp +++ b/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp @@ -571,6 +571,7 @@ ASTs InterpreterAlterImpl::getRewrittenQueries( auto rewritten_rename_query = std::make_shared(); rewritten_alter_query->database = mapped_to_database; rewritten_alter_query->table = alter_query.table; + rewritten_alter_query->alter_object = ASTAlterQuery::AlterObjectType::TABLE; rewritten_alter_query->set(rewritten_alter_query->command_list, std::make_shared()); String default_after_column; diff --git a/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp b/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp index c47e003a681..7cc3340bbb7 100644 --- a/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp +++ b/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp @@ -456,7 +456,7 @@ void PostgreSQLReplicationHandler::setSetting(const SettingChange & setting) { consumer_task->deactivate(); consumer->setSetting(setting); - consumer_task->schedule(); + consumer_task->activateAndSchedule(); } @@ -658,6 +658,7 @@ void PostgreSQLReplicationHandler::addTableToReplication(StorageMaterializedPost } catch (...) { + consumer_task->activate(); consumer_task->scheduleAfter(RESCHEDULE_MS); auto error_message = getCurrentExceptionMessage(false); @@ -685,13 +686,14 @@ void PostgreSQLReplicationHandler::removeTableFromReplication(const String & pos } catch (...) { + consumer_task->activate(); consumer_task->scheduleAfter(RESCHEDULE_MS); auto error_message = getCurrentExceptionMessage(false); throw Exception(ErrorCodes::POSTGRESQL_REPLICATION_INTERNAL_ERROR, "Failed to remove table `{}` from replication. Info: {}", postgres_table_name, error_message); } - consumer_task->schedule(); + consumer_task->activateAndSchedule(); } From 76e49334fa8073a7208cce6cd1dcb5e1d968ee30 Mon Sep 17 00:00:00 2001 From: kssenii Date: Sun, 5 Sep 2021 01:59:44 +0300 Subject: [PATCH 21/80] Minor fixes --- src/Databases/DatabaseAtomic.cpp | 53 +--------- src/Databases/DatabaseAtomic.h | 3 - src/Databases/DatabaseFactory.cpp | 4 +- src/Databases/DatabaseLazy.cpp | 4 +- src/Databases/DatabaseLazy.h | 2 +- src/Databases/DatabaseOnDisk.cpp | 53 +++++++++- src/Databases/DatabaseOnDisk.h | 6 +- src/Databases/DatabaseOrdinary.cpp | 8 +- src/Databases/DatabaseOrdinary.h | 5 +- .../MySQL/DatabaseMaterializedMySQL.cpp | 4 +- .../DatabaseMaterializedPostgreSQL.cpp | 97 +++++++++++-------- src/Parsers/ParserAlterQuery.cpp | 14 +++ .../test.py | 4 + 13 files changed, 149 insertions(+), 108 deletions(-) diff --git a/src/Databases/DatabaseAtomic.cpp b/src/Databases/DatabaseAtomic.cpp index e73a8114c00..1162eee5fef 100644 --- a/src/Databases/DatabaseAtomic.cpp +++ b/src/Databases/DatabaseAtomic.cpp @@ -38,11 +38,10 @@ public: }; DatabaseAtomic::DatabaseAtomic(String name_, String metadata_path_, UUID uuid, const String & logger_name, ContextPtr context_, ASTPtr storage_def_) - : DatabaseOrdinary(name_, std::move(metadata_path_), "store/", logger_name, context_) + : DatabaseOrdinary(name_, std::move(metadata_path_), "store/", logger_name, context_, storage_def_) , path_to_table_symlinks(fs::path(getContext()->getPath()) / "data" / escapeForFileName(name_) / "") , path_to_metadata_symlink(fs::path(getContext()->getPath()) / "metadata" / escapeForFileName(name_)) , db_uuid(uuid) - , storage_def(storage_def_) { assert(db_uuid != UUIDHelpers::Nil); fs::create_directories(path_to_table_symlinks); @@ -569,54 +568,4 @@ void DatabaseAtomic::checkDetachedTableNotInUse(const UUID & uuid) assertDetachedTableNotInUse(uuid); } -void DatabaseAtomic::modifySettingsMetadata(const SettingsChanges & settings_changes, ContextPtr local_context) -{ - auto create_query = getCreateDatabaseQuery()->clone(); - auto * create = create_query->as(); - auto * settings = create->storage->settings; - if (settings) - { - auto & previous_settings = settings->changes; - for (const auto & change : settings_changes) - { - auto it = std::find_if(previous_settings.begin(), previous_settings.end(), - [&](const auto & prev){ return prev.name == change.name; }); - if (it != previous_settings.end()) - it->value = change.value; - else - previous_settings.push_back(change); - } - } - else - { - auto settings = std::make_shared(); - settings->is_standalone = false; - settings->changes = settings_changes; - create->storage->set(create->storage->settings, settings->clone()); - } - - create->attach = true; - create->if_not_exists = false; - - WriteBufferFromOwnString statement_buf; - formatAST(*create, statement_buf, false); - writeChar('\n', statement_buf); - String statement = statement_buf.str(); - - String database_name_escaped = escapeForFileName(database_name); - fs::path metadata_root_path = fs::canonical(local_context->getGlobalContext()->getPath()); - fs::path metadata_file_tmp_path = fs::path(metadata_root_path) / "metadata" / (database_name_escaped + ".sql.tmp"); - fs::path metadata_file_path = fs::path(metadata_root_path) / "metadata" / (database_name_escaped + ".sql"); - - WriteBufferFromFile out(metadata_file_tmp_path, statement.size(), O_WRONLY | O_CREAT | O_EXCL); - writeString(statement, out); - - out.next(); - if (getContext()->getSettingsRef().fsync_metadata) - out.sync(); - out.close(); - - fs::rename(metadata_file_tmp_path, metadata_file_path); -} - } diff --git a/src/Databases/DatabaseAtomic.h b/src/Databases/DatabaseAtomic.h index 77d2ec6a6cb..934dcbb997e 100644 --- a/src/Databases/DatabaseAtomic.h +++ b/src/Databases/DatabaseAtomic.h @@ -61,8 +61,6 @@ public: void checkDetachedTableNotInUse(const UUID & uuid) override; void setDetachedTableNotInUseForce(const UUID & uuid); - void modifySettingsMetadata(const SettingsChanges & settings_changes, ContextPtr local_context) override; - protected: void commitAlterTable(const StorageID & table_id, const String & table_metadata_tmp_path, const String & table_metadata_path, const String & statement, ContextPtr query_context) override; void commitCreateTable(const ASTCreateQuery & query, const StoragePtr & table, @@ -82,7 +80,6 @@ protected: String path_to_table_symlinks; String path_to_metadata_symlink; const UUID db_uuid; - ASTPtr storage_def; }; } diff --git a/src/Databases/DatabaseFactory.cpp b/src/Databases/DatabaseFactory.cpp index 962177f6f49..9b732f29fe5 100644 --- a/src/Databases/DatabaseFactory.cpp +++ b/src/Databases/DatabaseFactory.cpp @@ -128,7 +128,7 @@ DatabasePtr DatabaseFactory::getImpl(const ASTCreateQuery & create, const String "Database engine `{}` cannot have parameters, primary_key, order_by, sample_by, settings", engine_name); if (engine_name == "Ordinary") - return std::make_shared(database_name, metadata_path, context); + return std::make_shared(database_name, metadata_path, context, engine_define->clone()); else if (engine_name == "Atomic") return std::make_shared(database_name, metadata_path, uuid, context, engine_define->clone()); else if (engine_name == "Memory") @@ -208,7 +208,7 @@ DatabasePtr DatabaseFactory::getImpl(const ASTCreateQuery & create, const String const auto & arguments = engine->arguments->children; const auto cache_expiration_time_seconds = safeGetLiteralValue(arguments[0], "Lazy"); - return std::make_shared(database_name, metadata_path, cache_expiration_time_seconds, context); + return std::make_shared(database_name, metadata_path, cache_expiration_time_seconds, context, engine_define->clone()); } else if (engine_name == "Replicated") diff --git a/src/Databases/DatabaseLazy.cpp b/src/Databases/DatabaseLazy.cpp index 7e0e1b7aa43..d77dcc06e36 100644 --- a/src/Databases/DatabaseLazy.cpp +++ b/src/Databases/DatabaseLazy.cpp @@ -28,8 +28,8 @@ namespace ErrorCodes } -DatabaseLazy::DatabaseLazy(const String & name_, const String & metadata_path_, time_t expiration_time_, ContextPtr context_) - : DatabaseOnDisk(name_, metadata_path_, "data/" + escapeForFileName(name_) + "/", "DatabaseLazy (" + name_ + ")", context_) +DatabaseLazy::DatabaseLazy(const String & name_, const String & metadata_path_, time_t expiration_time_, ContextPtr context_, ASTPtr storage_def_) + : DatabaseOnDisk(name_, metadata_path_, "data/" + escapeForFileName(name_) + "/", "DatabaseLazy (" + name_ + ")", context_, storage_def_) , expiration_time(expiration_time_) { } diff --git a/src/Databases/DatabaseLazy.h b/src/Databases/DatabaseLazy.h index bc79a49b2fe..49a9ae394f7 100644 --- a/src/Databases/DatabaseLazy.h +++ b/src/Databases/DatabaseLazy.h @@ -18,7 +18,7 @@ class Context; class DatabaseLazy final : public DatabaseOnDisk { public: - DatabaseLazy(const String & name_, const String & metadata_path_, time_t expiration_time_, ContextPtr context_); + DatabaseLazy(const String & name_, const String & metadata_path_, time_t expiration_time_, ContextPtr context_, ASTPtr storage_def_); String getEngineName() const override { return "Lazy"; } diff --git a/src/Databases/DatabaseOnDisk.cpp b/src/Databases/DatabaseOnDisk.cpp index 620e560b64c..0bb4c35ec6b 100644 --- a/src/Databases/DatabaseOnDisk.cpp +++ b/src/Databases/DatabaseOnDisk.cpp @@ -196,10 +196,12 @@ DatabaseOnDisk::DatabaseOnDisk( const String & metadata_path_, const String & data_path_, const String & logger, - ContextPtr local_context) + ContextPtr local_context, + ASTPtr storage_def_) : DatabaseWithOwnTablesBase(name, logger, local_context) , metadata_path(metadata_path_) , data_path(data_path_) + , storage_def(storage_def_) { fs::create_directories(local_context->getPath() + data_path); fs::create_directories(metadata_path); @@ -699,4 +701,53 @@ ASTPtr DatabaseOnDisk::getCreateQueryFromMetadata(const String & database_metada return ast; } +void DatabaseOnDisk::modifySettingsMetadata(const SettingsChanges & settings_changes, ContextPtr local_context) +{ + auto create_query = getCreateDatabaseQuery()->clone(); + auto * create = create_query->as(); + auto * settings = create->storage->settings; + if (settings) + { + auto & storage_settings = settings->changes; + for (const auto & change : settings_changes) + { + auto it = std::find_if(storage_settings.begin(), storage_settings.end(), + [&](const auto & prev){ return prev.name == change.name; }); + if (it != storage_settings.end()) + it->value = change.value; + else + storage_settings.push_back(change); + } + } + else + { + auto storage_settings = std::make_shared(); + storage_settings->is_standalone = false; + storage_settings->changes = settings_changes; + create->storage->set(create->storage->settings, storage_settings->clone()); + } + + create->attach = true; + create->if_not_exists = false; + + WriteBufferFromOwnString statement_buf; + formatAST(*create, statement_buf, false); + writeChar('\n', statement_buf); + String statement = statement_buf.str(); + + String database_name_escaped = escapeForFileName(database_name); + fs::path metadata_root_path = fs::canonical(local_context->getGlobalContext()->getPath()); + fs::path metadata_file_tmp_path = fs::path(metadata_root_path) / "metadata" / (database_name_escaped + ".sql.tmp"); + fs::path metadata_file_path = fs::path(metadata_root_path) / "metadata" / (database_name_escaped + ".sql"); + + WriteBufferFromFile out(metadata_file_tmp_path, statement.size(), O_WRONLY | O_CREAT | O_EXCL); + writeString(statement, out); + + out.next(); + if (getContext()->getSettingsRef().fsync_metadata) + out.sync(); + out.close(); + + fs::rename(metadata_file_tmp_path, metadata_file_path); +} } diff --git a/src/Databases/DatabaseOnDisk.h b/src/Databases/DatabaseOnDisk.h index e7dda7cb36b..ffad5f215c9 100644 --- a/src/Databases/DatabaseOnDisk.h +++ b/src/Databases/DatabaseOnDisk.h @@ -32,7 +32,7 @@ void applyMetadataChangesToCreateQuery(const ASTPtr & query, const StorageInMemo class DatabaseOnDisk : public DatabaseWithOwnTablesBase { public: - DatabaseOnDisk(const String & name, const String & metadata_path_, const String & data_path_, const String & logger, ContextPtr context); + DatabaseOnDisk(const String & name, const String & metadata_path_, const String & data_path_, const String & logger, ContextPtr context, ASTPtr storage_def_); void createTable( ContextPtr context, @@ -74,6 +74,8 @@ public: void checkMetadataFilenameAvailability(const String & to_table_name) const; void checkMetadataFilenameAvailabilityUnlocked(const String & to_table_name, std::unique_lock &) const; + void modifySettingsMetadata(const SettingsChanges & settings_changes, ContextPtr local_context) override; + protected: static constexpr const char * create_suffix = ".tmp"; static constexpr const char * drop_suffix = ".tmp_drop"; @@ -97,6 +99,8 @@ protected: const String metadata_path; const String data_path; + + ASTPtr storage_def; }; } diff --git a/src/Databases/DatabaseOrdinary.cpp b/src/Databases/DatabaseOrdinary.cpp index bfe5de4c95f..991dda44f0b 100644 --- a/src/Databases/DatabaseOrdinary.cpp +++ b/src/Databases/DatabaseOrdinary.cpp @@ -72,14 +72,14 @@ namespace } -DatabaseOrdinary::DatabaseOrdinary(const String & name_, const String & metadata_path_, ContextPtr context_) - : DatabaseOrdinary(name_, metadata_path_, "data/" + escapeForFileName(name_) + "/", "DatabaseOrdinary (" + name_ + ")", context_) +DatabaseOrdinary::DatabaseOrdinary(const String & name_, const String & metadata_path_, ContextPtr context_, ASTPtr storage_def_) + : DatabaseOrdinary(name_, metadata_path_, "data/" + escapeForFileName(name_) + "/", "DatabaseOrdinary (" + name_ + ")", context_, storage_def_) { } DatabaseOrdinary::DatabaseOrdinary( - const String & name_, const String & metadata_path_, const String & data_path_, const String & logger, ContextPtr context_) - : DatabaseOnDisk(name_, metadata_path_, data_path_, logger, context_) + const String & name_, const String & metadata_path_, const String & data_path_, const String & logger, ContextPtr context_, ASTPtr storage_def_) + : DatabaseOnDisk(name_, metadata_path_, data_path_, logger, context_, storage_def_) { } diff --git a/src/Databases/DatabaseOrdinary.h b/src/Databases/DatabaseOrdinary.h index 7832377ccae..e9cbb6f22e6 100644 --- a/src/Databases/DatabaseOrdinary.h +++ b/src/Databases/DatabaseOrdinary.h @@ -14,9 +14,10 @@ namespace DB class DatabaseOrdinary : public DatabaseOnDisk { public: - DatabaseOrdinary(const String & name_, const String & metadata_path_, ContextPtr context); + DatabaseOrdinary(const String & name_, const String & metadata_path_, ContextPtr context, ASTPtr storage_def_); DatabaseOrdinary( - const String & name_, const String & metadata_path_, const String & data_path_, const String & logger, ContextPtr context_); + const String & name_, const String & metadata_path_, const String & data_path_, + const String & logger, ContextPtr context_, ASTPtr storage_def_); String getEngineName() const override { return "Ordinary"; } diff --git a/src/Databases/MySQL/DatabaseMaterializedMySQL.cpp b/src/Databases/MySQL/DatabaseMaterializedMySQL.cpp index 1bdf4b962ec..9bc6ae679f6 100644 --- a/src/Databases/MySQL/DatabaseMaterializedMySQL.cpp +++ b/src/Databases/MySQL/DatabaseMaterializedMySQL.cpp @@ -38,13 +38,13 @@ DatabaseMaterializedMySQL::DatabaseMaterializedMySQL( mysqlxx::Pool && pool_, MySQLClient && client_, std::unique_ptr settings_, - ASTPtr) + ASTPtr storage_def_) : DatabaseOrdinary( database_name_, metadata_path_, "data/" + escapeForFileName(database_name_) + "/", "DatabaseMaterializedMySQL (" + database_name_ + ")", - context_) + context_, storage_def_) , settings(std::move(settings_)) , materialize_thread(context_, database_name_, mysql_database_name_, std::move(pool_), std::move(client_), settings.get()) { diff --git a/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.cpp b/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.cpp index 5c9cc95238b..4d5e4ff2d7b 100644 --- a/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.cpp +++ b/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.cpp @@ -194,31 +194,6 @@ StoragePtr DatabaseMaterializedPostgreSQL::tryGetTable(const String & name, Cont } -void DatabaseMaterializedPostgreSQL::createTable(ContextPtr local_context, const String & table_name, const StoragePtr & table, const ASTPtr & query) -{ - /// Create table query can only be called from replication thread. - if (local_context->isInternalQuery()) - { - DatabaseAtomic::createTable(local_context, table_name, table, query); - return; - } - - const auto & create = query->as(); - if (!create->attach) - throw Exception(ErrorCodes::QUERY_NOT_ALLOWED, "CREATE TABLE is not allowed for database engine {}. Use ATTACH TABLE instead", getEngineName()); - - /// Create ReplacingMergeTree table. - auto query_copy = query->clone(); - auto * create_query = assert_cast(query_copy.get()); - create_query->attach = false; - create_query->attach_short_syntax = false; - DatabaseAtomic::createTable(StorageMaterializedPostgreSQL::makeNestedTableContext(local_context), table_name, table, query_copy); - - /// Attach MaterializedPostgreSQL table. - attachTable(table_name, table, {}); -} - - String DatabaseMaterializedPostgreSQL::getTablesList(const String & except) const { String tables_list; @@ -272,26 +247,64 @@ ASTPtr DatabaseMaterializedPostgreSQL::createAlterSettingsQuery(const SettingCha } +void DatabaseMaterializedPostgreSQL::createTable(ContextPtr local_context, const String & table_name, const StoragePtr & table, const ASTPtr & query) +{ + /// Create table query can only be called from replication thread. + if (local_context->isInternalQuery()) + { + DatabaseAtomic::createTable(local_context, table_name, table, query); + return; + } + + const auto & create = query->as(); + if (!create->attach) + throw Exception(ErrorCodes::QUERY_NOT_ALLOWED, "CREATE TABLE is not allowed for database engine {}. Use ATTACH TABLE instead", getEngineName()); + + /// Create ReplacingMergeTree table. + auto query_copy = query->clone(); + auto * create_query = assert_cast(query_copy.get()); + create_query->attach = false; + create_query->attach_short_syntax = false; + DatabaseAtomic::createTable(StorageMaterializedPostgreSQL::makeNestedTableContext(local_context), table_name, table, query_copy); + + /// Attach MaterializedPostgreSQL table. + attachTable(table_name, table, {}); +} + + void DatabaseMaterializedPostgreSQL::attachTable(const String & table_name, const StoragePtr & table, const String & relative_table_path) { - /// TODO: If attach fails, need to delete nested... if (CurrentThread::isInitialized() && CurrentThread::get().getQueryContext()) { - auto tables_to_replicate = settings->materialized_postgresql_tables_list.value; - if (tables_to_replicate.empty()) - tables_to_replicate = getTablesList(); - - /// tables_to_replicate can be empty if postgres database had no tables when this database was created. - SettingChange new_setting("materialized_postgresql_tables_list", tables_to_replicate.empty() ? table_name : (tables_to_replicate + "," + table_name)); - auto alter_query = createAlterSettingsQuery(new_setting); - auto current_context = Context::createCopy(getContext()->getGlobalContext()); current_context->setInternalQuery(true); - InterpreterAlterQuery(alter_query, current_context).execute(); - auto storage = StorageMaterializedPostgreSQL::create(table, getContext(), remote_database_name, table_name); - materialized_tables[table_name] = storage; - replication_handler->addTableToReplication(dynamic_cast(storage.get()), table_name); + /// We just came from createTable() and created nested table there. Add assert. + auto nested_table = DatabaseAtomic::tryGetTable(table_name, current_context); + assert(nested_table != nullptr); + + try + { + auto tables_to_replicate = settings->materialized_postgresql_tables_list.value; + if (tables_to_replicate.empty()) + tables_to_replicate = getTablesList(); + + /// tables_to_replicate can be empty if postgres database had no tables when this database was created. + SettingChange new_setting("materialized_postgresql_tables_list", tables_to_replicate.empty() ? table_name : (tables_to_replicate + "," + table_name)); + auto alter_query = createAlterSettingsQuery(new_setting); + + InterpreterAlterQuery(alter_query, current_context).execute(); + + auto storage = StorageMaterializedPostgreSQL::create(table, getContext(), remote_database_name, table_name); + materialized_tables[table_name] = storage; + replication_handler->addTableToReplication(dynamic_cast(storage.get()), table_name); + } + catch (...) + { + /// This is a failed attach table. Remove already created nested table. + DatabaseAtomic::dropTable(current_context, table_name, true); + throw; + } } else { @@ -334,6 +347,14 @@ StoragePtr DatabaseMaterializedPostgreSQL::detachTable(const String & table_name } catch (Exception & e) { + /// We already removed this table from replication and adding it back will be an overkill.. + /// TODO: this is bad, we leave a table lying somewhere not dropped, and if user will want + /// to move it back into replication, he will fail to do so because there is undropped nested with the same name. + /// This can also happen if we crash after removing table from replication andd before dropping nested. + /// As a solution, we could drop a table if it already exists and add a fresh one instead for these two cases. + /// TODO: sounds good. + materialized_tables.erase(table_name); + e.addMessage("while removing table `" + table_name + "` from replication"); throw; } diff --git a/src/Parsers/ParserAlterQuery.cpp b/src/Parsers/ParserAlterQuery.cpp index bd998e962d1..e89302fd212 100644 --- a/src/Parsers/ParserAlterQuery.cpp +++ b/src/Parsers/ParserAlterQuery.cpp @@ -187,6 +187,20 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected command->type = ASTAlterCommand::RENAME_COLUMN; } + else if (s_materialize_column.ignore(pos, expected)) + { + if (!parser_name.parse(pos, command->column, expected)) + return false; + + command->type = ASTAlterCommand::MATERIALIZE_COLUMN; + command->detach = false; + + if (s_in_partition.ignore(pos, expected)) + { + if (!parser_partition.parse(pos, command->partition, expected)) + return false; + } + } else if (s_drop_partition.ignore(pos, expected)) { if (!parser_partition.parse(pos, command->partition, expected)) diff --git a/tests/integration/test_postgresql_replica_database_engine/test.py b/tests/integration/test_postgresql_replica_database_engine/test.py index 4c12966bef7..0c51bb3aafb 100644 --- a/tests/integration/test_postgresql_replica_database_engine/test.py +++ b/tests/integration/test_postgresql_replica_database_engine/test.py @@ -928,6 +928,10 @@ def test_abrupt_server_restart_while_heavy_replication(started_cluster): def test_quoting(started_cluster): table_name = 'user' + conn = get_postgres_conn(ip=started_cluster.postgres_ip, + port=started_cluster.postgres_port, + database=True) + cursor = conn.cursor() create_postgres_table(cursor, table_name); instance.query("INSERT INTO postgres_database.{} SELECT number, number from numbers(50)".format(table_name)) create_materialized_db(ip=started_cluster.postgres_ip, port=started_cluster.postgres_port) From bed2688dad02b83d77240b930b6c85775c24691a Mon Sep 17 00:00:00 2001 From: kssenii Date: Thu, 9 Sep 2021 01:25:08 +0300 Subject: [PATCH 22/80] Review fixes --- src/Databases/DatabaseAtomic.cpp | 8 ++-- src/Databases/DatabaseAtomic.h | 4 +- src/Databases/DatabaseFactory.cpp | 18 ++++---- src/Databases/DatabaseLazy.cpp | 4 +- src/Databases/DatabaseLazy.h | 2 +- src/Databases/DatabaseOnDisk.cpp | 10 ++--- src/Databases/DatabaseOnDisk.h | 7 ++-- src/Databases/DatabaseOrdinary.cpp | 8 ++-- src/Databases/DatabaseOrdinary.h | 4 +- src/Databases/DatabaseReplicated.cpp | 5 +-- src/Databases/DatabaseReplicated.h | 2 +- src/Databases/IDatabase.h | 16 ++------ .../MySQL/DatabaseMaterializedMySQL.cpp | 10 ++--- .../MySQL/DatabaseMaterializedMySQL.h | 2 +- .../DatabaseMaterializedPostgreSQL.cpp | 41 +++++++++++-------- .../DatabaseMaterializedPostgreSQL.h | 8 ++-- src/Interpreters/InterpreterAlterQuery.cpp | 19 ++++++++- src/Storages/AlterCommands.cpp | 18 -------- src/Storages/AlterCommands.h | 2 +- .../MaterializedPostgreSQLConsumer.cpp | 2 +- .../PostgreSQLReplicationHandler.cpp | 6 +-- .../StorageMaterializedPostgreSQL.cpp | 3 +- .../test.py | 4 ++ 23 files changed, 96 insertions(+), 107 deletions(-) diff --git a/src/Databases/DatabaseAtomic.cpp b/src/Databases/DatabaseAtomic.cpp index 1162eee5fef..7b1a8c6446e 100644 --- a/src/Databases/DatabaseAtomic.cpp +++ b/src/Databases/DatabaseAtomic.cpp @@ -37,8 +37,8 @@ public: UUID uuid() const override { return table()->getStorageID().uuid; } }; -DatabaseAtomic::DatabaseAtomic(String name_, String metadata_path_, UUID uuid, const String & logger_name, ContextPtr context_, ASTPtr storage_def_) - : DatabaseOrdinary(name_, std::move(metadata_path_), "store/", logger_name, context_, storage_def_) +DatabaseAtomic::DatabaseAtomic(String name_, String metadata_path_, UUID uuid, const String & logger_name, ContextPtr context_) + : DatabaseOrdinary(name_, std::move(metadata_path_), "store/", logger_name, context_) , path_to_table_symlinks(fs::path(getContext()->getPath()) / "data" / escapeForFileName(name_) / "") , path_to_metadata_symlink(fs::path(getContext()->getPath()) / "metadata" / escapeForFileName(name_)) , db_uuid(uuid) @@ -48,8 +48,8 @@ DatabaseAtomic::DatabaseAtomic(String name_, String metadata_path_, UUID uuid, c tryCreateMetadataSymlink(); } -DatabaseAtomic::DatabaseAtomic(String name_, String metadata_path_, UUID uuid, ContextPtr context_, ASTPtr storage_def_) - : DatabaseAtomic(name_, std::move(metadata_path_), uuid, "DatabaseAtomic (" + name_ + ")", context_, storage_def_) +DatabaseAtomic::DatabaseAtomic(String name_, String metadata_path_, UUID uuid, ContextPtr context_) + : DatabaseAtomic(name_, std::move(metadata_path_), uuid, "DatabaseAtomic (" + name_ + ")", context_) { } diff --git a/src/Databases/DatabaseAtomic.h b/src/Databases/DatabaseAtomic.h index 934dcbb997e..8be009cd6ca 100644 --- a/src/Databases/DatabaseAtomic.h +++ b/src/Databases/DatabaseAtomic.h @@ -19,8 +19,8 @@ namespace DB class DatabaseAtomic : public DatabaseOrdinary { public: - DatabaseAtomic(String name_, String metadata_path_, UUID uuid, const String & logger_name, ContextPtr context_, ASTPtr storage_def_); - DatabaseAtomic(String name_, String metadata_path_, UUID uuid, ContextPtr context_, ASTPtr storage_def_); + DatabaseAtomic(String name_, String metadata_path_, UUID uuid, const String & logger_name, ContextPtr context_); + DatabaseAtomic(String name_, String metadata_path_, UUID uuid, ContextPtr context_); String getEngineName() const override { return "Atomic"; } UUID getUUID() const override { return db_uuid; } diff --git a/src/Databases/DatabaseFactory.cpp b/src/Databases/DatabaseFactory.cpp index 9b732f29fe5..047d4a55802 100644 --- a/src/Databases/DatabaseFactory.cpp +++ b/src/Databases/DatabaseFactory.cpp @@ -128,9 +128,9 @@ DatabasePtr DatabaseFactory::getImpl(const ASTCreateQuery & create, const String "Database engine `{}` cannot have parameters, primary_key, order_by, sample_by, settings", engine_name); if (engine_name == "Ordinary") - return std::make_shared(database_name, metadata_path, context, engine_define->clone()); + return std::make_shared(database_name, metadata_path, context); else if (engine_name == "Atomic") - return std::make_shared(database_name, metadata_path, uuid, context, engine_define->clone()); + return std::make_shared(database_name, metadata_path, uuid, context); else if (engine_name == "Memory") return std::make_shared(database_name, context); else if (engine_name == "Dictionary") @@ -183,12 +183,12 @@ DatabasePtr DatabaseFactory::getImpl(const ASTCreateQuery & create, const String if (create.uuid == UUIDHelpers::Nil) return std::make_shared>( - context, database_name, metadata_path, uuid, mysql_database_name, std::move(mysql_pool), std::move(client) - , std::move(materialize_mode_settings), engine_define->clone()); + context, database_name, metadata_path, uuid, mysql_database_name, + std::move(mysql_pool), std::move(client), std::move(materialize_mode_settings)); else return std::make_shared>( - context, database_name, metadata_path, uuid, mysql_database_name, std::move(mysql_pool), std::move(client) - , std::move(materialize_mode_settings), engine_define->clone()); + context, database_name, metadata_path, uuid, mysql_database_name, + std::move(mysql_pool), std::move(client), std::move(materialize_mode_settings)); } catch (...) { @@ -208,7 +208,7 @@ DatabasePtr DatabaseFactory::getImpl(const ASTCreateQuery & create, const String const auto & arguments = engine->arguments->children; const auto cache_expiration_time_seconds = safeGetLiteralValue(arguments[0], "Lazy"); - return std::make_shared(database_name, metadata_path, cache_expiration_time_seconds, context, engine_define->clone()); + return std::make_shared(database_name, metadata_path, cache_expiration_time_seconds, context); } else if (engine_name == "Replicated") @@ -234,7 +234,7 @@ DatabasePtr DatabaseFactory::getImpl(const ASTCreateQuery & create, const String return std::make_shared(database_name, metadata_path, uuid, zookeeper_path, shard_name, replica_name, - std::move(database_replicated_settings), context, engine_define->clone()); + std::move(database_replicated_settings), context); } #if USE_LIBPQXX @@ -311,7 +311,7 @@ DatabasePtr DatabaseFactory::getImpl(const ASTCreateQuery & create, const String postgresql_replica_settings->loadFromQuery(*engine_define); return std::make_shared( - context, metadata_path, uuid, engine_define->clone(), create.attach, + context, metadata_path, uuid, create.attach, database_name, postgres_database_name, connection_info, std::move(postgresql_replica_settings)); } diff --git a/src/Databases/DatabaseLazy.cpp b/src/Databases/DatabaseLazy.cpp index d77dcc06e36..7e0e1b7aa43 100644 --- a/src/Databases/DatabaseLazy.cpp +++ b/src/Databases/DatabaseLazy.cpp @@ -28,8 +28,8 @@ namespace ErrorCodes } -DatabaseLazy::DatabaseLazy(const String & name_, const String & metadata_path_, time_t expiration_time_, ContextPtr context_, ASTPtr storage_def_) - : DatabaseOnDisk(name_, metadata_path_, "data/" + escapeForFileName(name_) + "/", "DatabaseLazy (" + name_ + ")", context_, storage_def_) +DatabaseLazy::DatabaseLazy(const String & name_, const String & metadata_path_, time_t expiration_time_, ContextPtr context_) + : DatabaseOnDisk(name_, metadata_path_, "data/" + escapeForFileName(name_) + "/", "DatabaseLazy (" + name_ + ")", context_) , expiration_time(expiration_time_) { } diff --git a/src/Databases/DatabaseLazy.h b/src/Databases/DatabaseLazy.h index 49a9ae394f7..bc79a49b2fe 100644 --- a/src/Databases/DatabaseLazy.h +++ b/src/Databases/DatabaseLazy.h @@ -18,7 +18,7 @@ class Context; class DatabaseLazy final : public DatabaseOnDisk { public: - DatabaseLazy(const String & name_, const String & metadata_path_, time_t expiration_time_, ContextPtr context_, ASTPtr storage_def_); + DatabaseLazy(const String & name_, const String & metadata_path_, time_t expiration_time_, ContextPtr context_); String getEngineName() const override { return "Lazy"; } diff --git a/src/Databases/DatabaseOnDisk.cpp b/src/Databases/DatabaseOnDisk.cpp index 0bb4c35ec6b..f5b930a83c7 100644 --- a/src/Databases/DatabaseOnDisk.cpp +++ b/src/Databases/DatabaseOnDisk.cpp @@ -196,12 +196,10 @@ DatabaseOnDisk::DatabaseOnDisk( const String & metadata_path_, const String & data_path_, const String & logger, - ContextPtr local_context, - ASTPtr storage_def_) + ContextPtr local_context) : DatabaseWithOwnTablesBase(name, logger, local_context) , metadata_path(metadata_path_) , data_path(data_path_) - , storage_def(storage_def_) { fs::create_directories(local_context->getPath() + data_path); fs::create_directories(metadata_path); @@ -701,8 +699,10 @@ ASTPtr DatabaseOnDisk::getCreateQueryFromMetadata(const String & database_metada return ast; } -void DatabaseOnDisk::modifySettingsMetadata(const SettingsChanges & settings_changes, ContextPtr local_context) +void DatabaseOnDisk::modifySettingsMetadata(const SettingsChanges & settings_changes, ContextPtr query_context) { + std::lock_guard lock(modify_settings_mutex); + auto create_query = getCreateDatabaseQuery()->clone(); auto * create = create_query->as(); auto * settings = create->storage->settings; @@ -736,7 +736,7 @@ void DatabaseOnDisk::modifySettingsMetadata(const SettingsChanges & settings_cha String statement = statement_buf.str(); String database_name_escaped = escapeForFileName(database_name); - fs::path metadata_root_path = fs::canonical(local_context->getGlobalContext()->getPath()); + fs::path metadata_root_path = fs::canonical(query_context->getGlobalContext()->getPath()); fs::path metadata_file_tmp_path = fs::path(metadata_root_path) / "metadata" / (database_name_escaped + ".sql.tmp"); fs::path metadata_file_path = fs::path(metadata_root_path) / "metadata" / (database_name_escaped + ".sql"); diff --git a/src/Databases/DatabaseOnDisk.h b/src/Databases/DatabaseOnDisk.h index ffad5f215c9..e375704be33 100644 --- a/src/Databases/DatabaseOnDisk.h +++ b/src/Databases/DatabaseOnDisk.h @@ -32,7 +32,7 @@ void applyMetadataChangesToCreateQuery(const ASTPtr & query, const StorageInMemo class DatabaseOnDisk : public DatabaseWithOwnTablesBase { public: - DatabaseOnDisk(const String & name, const String & metadata_path_, const String & data_path_, const String & logger, ContextPtr context, ASTPtr storage_def_); + DatabaseOnDisk(const String & name, const String & metadata_path_, const String & data_path_, const String & logger, ContextPtr context); void createTable( ContextPtr context, @@ -74,7 +74,7 @@ public: void checkMetadataFilenameAvailability(const String & to_table_name) const; void checkMetadataFilenameAvailabilityUnlocked(const String & to_table_name, std::unique_lock &) const; - void modifySettingsMetadata(const SettingsChanges & settings_changes, ContextPtr local_context) override; + void modifySettingsMetadata(const SettingsChanges & settings_changes, ContextPtr query_context); protected: static constexpr const char * create_suffix = ".tmp"; @@ -100,7 +100,8 @@ protected: const String metadata_path; const String data_path; - ASTPtr storage_def; + /// For alter settings. + std::mutex modify_settings_mutex; }; } diff --git a/src/Databases/DatabaseOrdinary.cpp b/src/Databases/DatabaseOrdinary.cpp index 991dda44f0b..bfe5de4c95f 100644 --- a/src/Databases/DatabaseOrdinary.cpp +++ b/src/Databases/DatabaseOrdinary.cpp @@ -72,14 +72,14 @@ namespace } -DatabaseOrdinary::DatabaseOrdinary(const String & name_, const String & metadata_path_, ContextPtr context_, ASTPtr storage_def_) - : DatabaseOrdinary(name_, metadata_path_, "data/" + escapeForFileName(name_) + "/", "DatabaseOrdinary (" + name_ + ")", context_, storage_def_) +DatabaseOrdinary::DatabaseOrdinary(const String & name_, const String & metadata_path_, ContextPtr context_) + : DatabaseOrdinary(name_, metadata_path_, "data/" + escapeForFileName(name_) + "/", "DatabaseOrdinary (" + name_ + ")", context_) { } DatabaseOrdinary::DatabaseOrdinary( - const String & name_, const String & metadata_path_, const String & data_path_, const String & logger, ContextPtr context_, ASTPtr storage_def_) - : DatabaseOnDisk(name_, metadata_path_, data_path_, logger, context_, storage_def_) + const String & name_, const String & metadata_path_, const String & data_path_, const String & logger, ContextPtr context_) + : DatabaseOnDisk(name_, metadata_path_, data_path_, logger, context_) { } diff --git a/src/Databases/DatabaseOrdinary.h b/src/Databases/DatabaseOrdinary.h index e9cbb6f22e6..5540632d60c 100644 --- a/src/Databases/DatabaseOrdinary.h +++ b/src/Databases/DatabaseOrdinary.h @@ -14,10 +14,10 @@ namespace DB class DatabaseOrdinary : public DatabaseOnDisk { public: - DatabaseOrdinary(const String & name_, const String & metadata_path_, ContextPtr context, ASTPtr storage_def_); + DatabaseOrdinary(const String & name_, const String & metadata_path_, ContextPtr context); DatabaseOrdinary( const String & name_, const String & metadata_path_, const String & data_path_, - const String & logger, ContextPtr context_, ASTPtr storage_def_); + const String & logger, ContextPtr context_); String getEngineName() const override { return "Ordinary"; } diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp index 28481267a33..da03eb6aba6 100644 --- a/src/Databases/DatabaseReplicated.cpp +++ b/src/Databases/DatabaseReplicated.cpp @@ -67,9 +67,8 @@ DatabaseReplicated::DatabaseReplicated( const String & shard_name_, const String & replica_name_, DatabaseReplicatedSettings db_settings_, - ContextPtr context_, - ASTPtr storage_def_) - : DatabaseAtomic(name_, metadata_path_, uuid, "DatabaseReplicated (" + name_ + ")", context_, storage_def_) + ContextPtr context_) + : DatabaseAtomic(name_, metadata_path_, uuid, "DatabaseReplicated (" + name_ + ")", context_) , zookeeper_path(zookeeper_path_) , shard_name(shard_name_) , replica_name(replica_name_) diff --git a/src/Databases/DatabaseReplicated.h b/src/Databases/DatabaseReplicated.h index b75a29f6333..1e0daeed07e 100644 --- a/src/Databases/DatabaseReplicated.h +++ b/src/Databases/DatabaseReplicated.h @@ -24,7 +24,7 @@ public: DatabaseReplicated(const String & name_, const String & metadata_path_, UUID uuid, const String & zookeeper_path_, const String & shard_name_, const String & replica_name_, DatabaseReplicatedSettings db_settings_, - ContextPtr context, ASTPtr storage_def_); + ContextPtr context); ~DatabaseReplicated() override; diff --git a/src/Databases/IDatabase.h b/src/Databases/IDatabase.h index 3e181b8e3a2..92041b366a7 100644 --- a/src/Databases/IDatabase.h +++ b/src/Databases/IDatabase.h @@ -282,19 +282,11 @@ public: /// Delete data and metadata stored inside the database, if exists. virtual void drop(ContextPtr /*context*/) {} - virtual void checkAlterIsPossible(const AlterCommands & /* commands */, ContextPtr /* context */) const + virtual void applyNewSettings(const SettingsChanges &, ContextPtr) { - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Alter is not supported by database engine {}", getEngineName()); - } - - virtual void modifySettingsMetadata(const SettingsChanges &, ContextPtr) - { - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Database engine {} does not support settings", getEngineName()); - } - - virtual void tryApplySettings(const SettingsChanges &, ContextPtr) - { - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Database engine {} does not support settings", getEngineName()); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, + "Database engine {} either does not support settings, or does not support altering settings", + getEngineName()); } virtual ~IDatabase() = default; diff --git a/src/Databases/MySQL/DatabaseMaterializedMySQL.cpp b/src/Databases/MySQL/DatabaseMaterializedMySQL.cpp index 9bc6ae679f6..0d81a4e1a98 100644 --- a/src/Databases/MySQL/DatabaseMaterializedMySQL.cpp +++ b/src/Databases/MySQL/DatabaseMaterializedMySQL.cpp @@ -37,14 +37,13 @@ DatabaseMaterializedMySQL::DatabaseMaterializedMySQL( const String & mysql_database_name_, mysqlxx::Pool && pool_, MySQLClient && client_, - std::unique_ptr settings_, - ASTPtr storage_def_) + std::unique_ptr settings_) : DatabaseOrdinary( database_name_, metadata_path_, "data/" + escapeForFileName(database_name_) + "/", "DatabaseMaterializedMySQL (" + database_name_ + ")", - context_, storage_def_) + context_) , settings(std::move(settings_)) , materialize_thread(context_, database_name_, mysql_database_name_, std::move(pool_), std::move(client_), settings.get()) { @@ -59,9 +58,8 @@ DatabaseMaterializedMySQL::DatabaseMaterializedMySQL( const String & mysql_database_name_, mysqlxx::Pool && pool_, MySQLClient && client_, - std::unique_ptr settings_, - ASTPtr storage_def_) - : DatabaseAtomic(database_name_, metadata_path_, uuid, "DatabaseMaterializedMySQL (" + database_name_ + ")", context_, storage_def_) + std::unique_ptr settings_) + : DatabaseAtomic(database_name_, metadata_path_, uuid, "DatabaseMaterializedMySQL (" + database_name_ + ")", context_) , settings(std::move(settings_)) , materialize_thread(context_, database_name_, mysql_database_name_, std::move(pool_), std::move(client_), settings.get()) { diff --git a/src/Databases/MySQL/DatabaseMaterializedMySQL.h b/src/Databases/MySQL/DatabaseMaterializedMySQL.h index 0339df5a156..292edc97878 100644 --- a/src/Databases/MySQL/DatabaseMaterializedMySQL.h +++ b/src/Databases/MySQL/DatabaseMaterializedMySQL.h @@ -25,7 +25,7 @@ public: DatabaseMaterializedMySQL( ContextPtr context, const String & database_name_, const String & metadata_path_, UUID uuid, const String & mysql_database_name_, mysqlxx::Pool && pool_, - MySQLClient && client_, std::unique_ptr settings_, ASTPtr storage_def_); + MySQLClient && client_, std::unique_ptr settings_); void rethrowExceptionIfNeed() const; diff --git a/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.cpp b/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.cpp index e00f067e1bc..0e441efc428 100644 --- a/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.cpp +++ b/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.cpp @@ -41,13 +41,12 @@ DatabaseMaterializedPostgreSQL::DatabaseMaterializedPostgreSQL( ContextPtr context_, const String & metadata_path_, UUID uuid_, - ASTPtr storage_def_, bool is_attach_, const String & database_name_, const String & postgres_database_name, const postgres::ConnectionInfo & connection_info_, std::unique_ptr settings_) - : DatabaseAtomic(database_name_, metadata_path_, uuid_, "DatabaseMaterializedPostgreSQL (" + database_name_ + ")", context_, storage_def_) + : DatabaseAtomic(database_name_, metadata_path_, uuid_, "DatabaseMaterializedPostgreSQL (" + database_name_ + ")", context_) , is_attach(is_attach_) , remote_database_name(postgres_database_name) , connection_info(connection_info_) @@ -129,18 +128,9 @@ void DatabaseMaterializedPostgreSQL::loadStoredObjects( } -void DatabaseMaterializedPostgreSQL::checkAlterIsPossible(const AlterCommands & commands, ContextPtr) const -{ - for (const auto & command : commands) - { - if (command.type != AlterCommand::MODIFY_DATABASE_SETTING) - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Alter of type '{}' is not supported by database engine {}", alterTypeToString(command.type), getEngineName()); - } -} - - -void DatabaseMaterializedPostgreSQL::tryApplySettings(const SettingsChanges & settings_changes, ContextPtr local_context) +void DatabaseMaterializedPostgreSQL::applyNewSettings(const SettingsChanges & settings_changes, ContextPtr query_context) { + std::lock_guard lock(handler_mutex); for (const auto & change : settings_changes) { if (!settings->has(change.name)) @@ -148,11 +138,14 @@ void DatabaseMaterializedPostgreSQL::tryApplySettings(const SettingsChanges & se if ((change.name == "materialized_postgresql_tables_list")) { - if (!local_context->isInternalQuery()) + if (!query_context->isInternalQuery()) throw Exception(ErrorCodes::QUERY_NOT_ALLOWED, "Changing setting `{}` is not allowed", change.name); + + DatabaseOnDisk::modifySettingsMetadata(settings_changes, query_context); } else if ((change.name == "materialized_postgresql_allow_automatic_update") || (change.name == "materialized_postgresql_max_block_size")) { + DatabaseOnDisk::modifySettingsMetadata(settings_changes, query_context); replication_handler->setSetting(change); } else @@ -192,7 +185,8 @@ StoragePtr DatabaseMaterializedPostgreSQL::tryGetTable(const String & name, Cont } -String DatabaseMaterializedPostgreSQL::getTablesList(const String & except) const +/// `except` is not empty in case it is detach and it will contain only one table name - name of detached table. +String DatabaseMaterializedPostgreSQL::getFormattedTablesList(const String & except) const { String tables_list; for (const auto & table : materialized_tables) @@ -214,6 +208,8 @@ ASTPtr DatabaseMaterializedPostgreSQL::getCreateTableQueryImpl(const String & ta if (!local_context->hasQueryContext()) return DatabaseAtomic::getCreateTableQueryImpl(table_name, local_context, throw_on_error); + std::lock_guard lock(handler_mutex); + auto storage = StorageMaterializedPostgreSQL::create(StorageID(database_name, table_name), getContext(), remote_database_name, table_name); auto ast_storage = replication_handler->getCreateNestedTableQuery(storage.get(), table_name); assert_cast(ast_storage.get())->uuid = UUIDHelpers::generateV4(); @@ -272,6 +268,8 @@ void DatabaseMaterializedPostgreSQL::createTable(ContextPtr local_context, const void DatabaseMaterializedPostgreSQL::attachTable(const String & table_name, const StoragePtr & table, const String & relative_table_path) { + /// If there is query context then we need to attach materialized storage. + /// If there is no query context then we need to attach internal storage from atomic database. if (CurrentThread::isInitialized() && CurrentThread::get().getQueryContext()) { auto current_context = Context::createCopy(getContext()->getGlobalContext()); @@ -285,7 +283,7 @@ void DatabaseMaterializedPostgreSQL::attachTable(const String & table_name, cons { auto tables_to_replicate = settings->materialized_postgresql_tables_list.value; if (tables_to_replicate.empty()) - tables_to_replicate = getTablesList(); + tables_to_replicate = getFormattedTablesList(); /// tables_to_replicate can be empty if postgres database had no tables when this database was created. SettingChange new_setting("materialized_postgresql_tables_list", tables_to_replicate.empty() ? table_name : (tables_to_replicate + "," + table_name)); @@ -295,6 +293,8 @@ void DatabaseMaterializedPostgreSQL::attachTable(const String & table_name, cons auto storage = StorageMaterializedPostgreSQL::create(table, getContext(), remote_database_name, table_name); materialized_tables[table_name] = storage; + + std::lock_guard lock(handler_mutex); replication_handler->addTableToReplication(dynamic_cast(storage.get()), table_name); } catch (...) @@ -313,13 +313,15 @@ void DatabaseMaterializedPostgreSQL::attachTable(const String & table_name, cons StoragePtr DatabaseMaterializedPostgreSQL::detachTable(const String & table_name) { + /// If there is query context then we need to dettach materialized storage. + /// If there is no query context then we need to dettach internal storage from atomic database. if (CurrentThread::isInitialized() && CurrentThread::get().getQueryContext()) { auto & table_to_delete = materialized_tables[table_name]; if (!table_to_delete) throw Exception(ErrorCodes::UNKNOWN_TABLE, "Materialized table `{}` does not exist", table_name); - auto tables_to_replicate = getTablesList(table_name); + auto tables_to_replicate = getFormattedTablesList(table_name); /// tables_to_replicate can be empty if postgres database had no tables when this database was created. SettingChange new_setting("materialized_postgresql_tables_list", tables_to_replicate); @@ -335,6 +337,7 @@ StoragePtr DatabaseMaterializedPostgreSQL::detachTable(const String & table_name if (!nested) throw Exception(ErrorCodes::UNKNOWN_TABLE, "Inner table `{}` does not exist", table_name); + std::lock_guard lock(handler_mutex); replication_handler->removeTableFromReplication(table_name); try @@ -348,7 +351,7 @@ StoragePtr DatabaseMaterializedPostgreSQL::detachTable(const String & table_name /// We already removed this table from replication and adding it back will be an overkill.. /// TODO: this is bad, we leave a table lying somewhere not dropped, and if user will want /// to move it back into replication, he will fail to do so because there is undropped nested with the same name. - /// This can also happen if we crash after removing table from replication andd before dropping nested. + /// This can also happen if we crash after removing table from replication and before dropping nested. /// As a solution, we could drop a table if it already exists and add a fresh one instead for these two cases. /// TODO: sounds good. materialized_tables.erase(table_name); @@ -376,6 +379,7 @@ void DatabaseMaterializedPostgreSQL::shutdown() void DatabaseMaterializedPostgreSQL::stopReplication() { + std::lock_guard lock(handler_mutex); if (replication_handler) replication_handler->shutdown(); @@ -393,6 +397,7 @@ void DatabaseMaterializedPostgreSQL::dropTable(ContextPtr local_context, const S void DatabaseMaterializedPostgreSQL::drop(ContextPtr local_context) { + std::lock_guard lock(handler_mutex); if (replication_handler) replication_handler->shutdownFinal(); diff --git a/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.h b/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.h index 2d27677f524..effd0ec653a 100644 --- a/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.h +++ b/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.h @@ -32,7 +32,6 @@ public: ContextPtr context_, const String & metadata_path_, UUID uuid_, - ASTPtr storage_def_, bool is_attach_, const String & database_name_, const String & postgres_database_name, @@ -62,9 +61,7 @@ public: void stopReplication(); - void checkAlterIsPossible(const AlterCommands & commands, ContextPtr context) const override; - - void tryApplySettings(const SettingsChanges & settings_changes, ContextPtr context) override; + void applyNewSettings(const SettingsChanges & settings_changes, ContextPtr query_context) override; void shutdown() override; @@ -78,7 +75,7 @@ private: ASTPtr createAlterSettingsQuery(const SettingChange & new_setting); - String getTablesList(const String & except = {}) const; + String getFormattedTablesList(const String & except = {}) const; bool is_attach; String remote_database_name; @@ -88,6 +85,7 @@ private: std::shared_ptr replication_handler; std::map materialized_tables; mutable std::mutex tables_mutex; + mutable std::mutex handler_mutex; }; } diff --git a/src/Interpreters/InterpreterAlterQuery.cpp b/src/Interpreters/InterpreterAlterQuery.cpp index 5d93875ad77..8a23acdefc4 100644 --- a/src/Interpreters/InterpreterAlterQuery.cpp +++ b/src/Interpreters/InterpreterAlterQuery.cpp @@ -185,8 +185,23 @@ BlockIO InterpreterAlterQuery::executeToDatabase(const ASTAlterQuery & alter) if (!alter_commands.empty()) { - database->checkAlterIsPossible(alter_commands, getContext()); - alter_commands.apply(database, getContext()); + /// Only ALTER SETTING is supported. + for (const auto & command : alter_commands) + { + if (command.type != AlterCommand::MODIFY_DATABASE_SETTING) + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Alter of type '{}' is not supported by databases", alterTypeToString(command.type)); + } + + for (const auto & command : alter_commands) + { + if (!command.ignore) + { + if (command.type == AlterCommand::MODIFY_DATABASE_SETTING) + database->applyNewSettings(command.settings_changes, getContext()); + else + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unsupported alter command"); + } + } } return res; diff --git a/src/Storages/AlterCommands.cpp b/src/Storages/AlterCommands.cpp index 6ee3db90724..e6d60a23863 100644 --- a/src/Storages/AlterCommands.cpp +++ b/src/Storages/AlterCommands.cpp @@ -358,24 +358,6 @@ std::optional AlterCommand::parse(const ASTAlterCommand * command_ } -void AlterCommands::apply(DatabasePtr database, ContextPtr context) const -{ - for (const AlterCommand & command : *this) - { - if (!command.ignore) - { - if (command.type == AlterCommand::MODIFY_DATABASE_SETTING) - { - database->tryApplySettings(command.settings_changes, context); - database->modifySettingsMetadata(command.settings_changes, context); - } - else - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unsupported alter command"); - } - } -} - - void AlterCommand::apply(StorageInMemoryMetadata & metadata, ContextPtr context) const { if (type == ADD_COLUMN) diff --git a/src/Storages/AlterCommands.h b/src/Storages/AlterCommands.h index a2a1a3b6709..ae1db10fb47 100644 --- a/src/Storages/AlterCommands.h +++ b/src/Storages/AlterCommands.h @@ -197,7 +197,7 @@ public: /// Commands have to be prepared before apply. void apply(StorageInMemoryMetadata & metadata, ContextPtr context) const; - void apply(DatabasePtr database, ContextPtr context) const; + /// At least one command modify settings. bool hasSettingsAlterCommand() const; diff --git a/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp b/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp index c992699a206..46033efc12e 100644 --- a/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp +++ b/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp @@ -29,7 +29,7 @@ MaterializedPostgreSQLConsumer::MaterializedPostgreSQLConsumer( bool allow_automatic_update_, Storages storages_, const String & name_for_logger) - : log(&Poco::Logger::get("PostgreSQLReplicaConsumer("+ name_for_logger +")")) + : log(&Poco::Logger::get("PostgreSQLReplicaConsumer(" + name_for_logger + ")")) , context(context_) , replication_slot_name(replication_slot_name_) , publication_name(publication_name_) diff --git a/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp b/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp index e763815626e..23ee7532b5e 100644 --- a/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp +++ b/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp @@ -500,14 +500,10 @@ void PostgreSQLReplicationHandler::shutdownFinal() dropReplicationSlot(tx, /* temporary */true); }); -<<<<<<< HEAD - connection.execWithRetry([&](pqxx::nontransaction & tx) -======= if (user_managed_slot) return; - connection->execWithRetry([&](pqxx::nontransaction & tx) ->>>>>>> 8588e4d9bb809f7ca29426934855567646d2bd01 + connection.execWithRetry([&](pqxx::nontransaction & tx) { if (isReplicationSlotExist(tx, last_committed_lsn, /* temporary */false)) dropReplicationSlot(tx, /* temporary */false); diff --git a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp index cc5b857fc16..fdded7283c4 100644 --- a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp +++ b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp @@ -75,8 +75,7 @@ StorageMaterializedPostgreSQL::StorageMaterializedPostgreSQL( getContext(), is_attach, *replication_settings, - /* is_materialized_postgresql_database */false, - remote_table_name); + /* is_materialized_postgresql_database */false); if (!is_attach) { diff --git a/tests/integration/test_postgresql_replica_database_engine/test.py b/tests/integration/test_postgresql_replica_database_engine/test.py index cef502be0fe..aa9be8e0244 100644 --- a/tests/integration/test_postgresql_replica_database_engine/test.py +++ b/tests/integration/test_postgresql_replica_database_engine/test.py @@ -985,6 +985,10 @@ def test_user_managed_slots(started_cluster): def test_add_new_table_to_replication(started_cluster): drop_materialized_db() + conn = get_postgres_conn(ip=started_cluster.postgres_ip, + port=started_cluster.postgres_port, + database=True) + cursor = conn.cursor() NUM_TABLES = 5 for i in range(NUM_TABLES): From f26da04cdfe53fb5238baaab2a54bf5698e6936c Mon Sep 17 00:00:00 2001 From: kssenii Date: Thu, 9 Sep 2021 09:53:01 +0300 Subject: [PATCH 23/80] Fix checks --- src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.cpp | 5 ++--- src/Interpreters/InterpreterAlterQuery.cpp | 2 +- src/Storages/AlterCommands.h | 2 -- tests/integration/test_grant_and_revoke/test.py | 2 +- 4 files changed, 4 insertions(+), 7 deletions(-) diff --git a/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.cpp b/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.cpp index 0e441efc428..42720fa4eb1 100644 --- a/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.cpp +++ b/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.cpp @@ -30,7 +30,6 @@ namespace DB namespace ErrorCodes { - extern const int NOT_IMPLEMENTED; extern const int LOGICAL_ERROR; extern const int QUERY_NOT_ALLOWED; extern const int UNKNOWN_TABLE; @@ -313,8 +312,8 @@ void DatabaseMaterializedPostgreSQL::attachTable(const String & table_name, cons StoragePtr DatabaseMaterializedPostgreSQL::detachTable(const String & table_name) { - /// If there is query context then we need to dettach materialized storage. - /// If there is no query context then we need to dettach internal storage from atomic database. + /// If there is query context then we need to detach materialized storage. + /// If there is no query context then we need to detach internal storage from atomic database. if (CurrentThread::isInitialized() && CurrentThread::get().getQueryContext()) { auto & table_to_delete = materialized_tables[table_name]; diff --git a/src/Interpreters/InterpreterAlterQuery.cpp b/src/Interpreters/InterpreterAlterQuery.cpp index 8a23acdefc4..fc519124a47 100644 --- a/src/Interpreters/InterpreterAlterQuery.cpp +++ b/src/Interpreters/InterpreterAlterQuery.cpp @@ -199,7 +199,7 @@ BlockIO InterpreterAlterQuery::executeToDatabase(const ASTAlterQuery & alter) if (command.type == AlterCommand::MODIFY_DATABASE_SETTING) database->applyNewSettings(command.settings_changes, getContext()); else - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unsupported alter command"); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unsupported alter command"); } } } diff --git a/src/Storages/AlterCommands.h b/src/Storages/AlterCommands.h index ae1db10fb47..ad2647c321f 100644 --- a/src/Storages/AlterCommands.h +++ b/src/Storages/AlterCommands.h @@ -197,8 +197,6 @@ public: /// Commands have to be prepared before apply. void apply(StorageInMemoryMetadata & metadata, ContextPtr context) const; - - /// At least one command modify settings. bool hasSettingsAlterCommand() const; diff --git a/tests/integration/test_grant_and_revoke/test.py b/tests/integration/test_grant_and_revoke/test.py index 79fe4bf9f41..b905e4df219 100644 --- a/tests/integration/test_grant_and_revoke/test.py +++ b/tests/integration/test_grant_and_revoke/test.py @@ -151,7 +151,7 @@ def test_grant_all_on_table(): instance.query("GRANT ALL ON test.table TO A WITH GRANT OPTION") instance.query("GRANT ALL ON test.table TO B", user='A') assert instance.query( - "SHOW GRANTS FOR B") == "GRANT SHOW TABLES, SHOW COLUMNS, SHOW DICTIONARIES, SELECT, INSERT, ALTER, CREATE TABLE, CREATE VIEW, CREATE DICTIONARY, DROP TABLE, DROP VIEW, DROP DICTIONARY, TRUNCATE, OPTIMIZE, SYSTEM MERGES, SYSTEM TTL MERGES, SYSTEM FETCHES, SYSTEM MOVES, SYSTEM SENDS, SYSTEM REPLICATION QUEUES, SYSTEM DROP REPLICA, SYSTEM SYNC REPLICA, SYSTEM RESTART REPLICA, SYSTEM RESTORE REPLICA, SYSTEM FLUSH DISTRIBUTED, dictGet ON test.table TO B\n" + "SHOW GRANTS FOR B") == "GRANT SHOW TABLES, SHOW COLUMNS, SHOW DICTIONARIES, SELECT, INSERT, ALTER TABLE, ALTER VIEW, CREATE TABLE, CREATE VIEW, CREATE DICTIONARY, DROP TABLE, DROP VIEW, DROP DICTIONARY, TRUNCATE, OPTIMIZE, SYSTEM MERGES, SYSTEM TTL MERGES, SYSTEM FETCHES, SYSTEM MOVES, SYSTEM SENDS, SYSTEM REPLICATION QUEUES, SYSTEM DROP REPLICA, SYSTEM SYNC REPLICA, SYSTEM RESTART REPLICA, SYSTEM RESTORE REPLICA, SYSTEM FLUSH DISTRIBUTED, dictGet ON test.table TO B\n" instance.query("REVOKE ALL ON test.table FROM B", user='A') assert instance.query("SHOW GRANTS FOR B") == "" From ee8cb9300426e71d07edf19da57c1c7886433d4b Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Mon, 13 Sep 2021 15:19:37 +0300 Subject: [PATCH 24/80] add function zookeeperSessionUptime() --- src/Common/ZooKeeper/ZooKeeper.h | 5 ++ src/Functions/FunctionServerConstantBase.h | 57 ++++++++++++++++++ src/Functions/buildId.cpp | 58 +++--------------- src/Functions/hostName.cpp | 59 +++---------------- .../registerFunctionsMiscellaneous.cpp | 2 + src/Functions/serverUUID.cpp | 45 ++------------ src/Functions/tcpPort.cpp | 46 +++------------ src/Functions/timezone.cpp | 55 +++-------------- src/Functions/uptime.cpp | 58 ++++-------------- src/Functions/version.cpp | 55 ++++------------- src/Functions/zookeeperSessionUptime.cpp | 25 ++++++++ src/Interpreters/Context.cpp | 8 +++ src/Interpreters/Context.h | 2 + 13 files changed, 159 insertions(+), 316 deletions(-) create mode 100644 src/Functions/FunctionServerConstantBase.h create mode 100644 src/Functions/zookeeperSessionUptime.cpp diff --git a/src/Common/ZooKeeper/ZooKeeper.h b/src/Common/ZooKeeper/ZooKeeper.h index bfbfea03aae..27dfdad7cdd 100644 --- a/src/Common/ZooKeeper/ZooKeeper.h +++ b/src/Common/ZooKeeper/ZooKeeper.h @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -277,6 +278,8 @@ public: void setZooKeeperLog(std::shared_ptr zk_log_); + UInt32 getSessionUptime() const { return session_uptime.elapsedSeconds(); } + private: friend class EphemeralNodeHolder; @@ -307,6 +310,8 @@ private: Poco::Logger * log = nullptr; std::shared_ptr zk_log; + + AtomicStopwatch session_uptime; }; diff --git a/src/Functions/FunctionServerConstantBase.h b/src/Functions/FunctionServerConstantBase.h new file mode 100644 index 00000000000..2dbc427dcf7 --- /dev/null +++ b/src/Functions/FunctionServerConstantBase.h @@ -0,0 +1,57 @@ +#pragma once +#include +#include +#include + + +namespace DB +{ + +/// Base class for functions which return server-level constant like version() or uptime() +template +class FunctionServerConstantBase : public IFunction +{ +public: + static constexpr auto name = func_name; + + explicit FunctionServerConstantBase(ContextPtr context, T && value_) + : is_distributed(context->isDistributed()) + , value(std::forward(value_)) + { + } + + String getName() const override + { + return name; + } + + size_t getNumberOfArguments() const override + { + return 0; + } + + DataTypePtr getReturnTypeImpl(const DataTypes & /*arguments*/) const override + { + return std::make_shared(); + } + + bool isDeterministic() const override { return false; } + bool isDeterministicInScopeOfQuery() const override { return true; } + + /// Function may return different values on different shareds/replicas, so it's not constant for distributed query + bool isSuitableForConstantFolding() const override { return !is_distributed; } + + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName &, const DataTypePtr &, size_t input_rows_count) const override + { + return ColumnT().createColumnConst(input_rows_count, value); + } + +private: + bool is_distributed; + T value; +}; + +} + diff --git a/src/Functions/buildId.cpp b/src/Functions/buildId.cpp index 047bddeed9b..40223cf0add 100644 --- a/src/Functions/buildId.cpp +++ b/src/Functions/buildId.cpp @@ -1,65 +1,23 @@ #if defined(__ELF__) && !defined(__FreeBSD__) -#include -#include +#include #include #include -#include -#include - namespace DB { namespace { + constexpr char name[] = "buildId"; -/** buildId() - returns the compiler build id of the running binary. - */ -class FunctionBuildId : public IFunction -{ -public: - static constexpr auto name = "buildId"; - static FunctionPtr create(ContextPtr context) + /// buildId() - returns the compiler build id of the running binary. + class FunctionBuildId : public FunctionServerConstantBase { - return std::make_shared(context->isDistributed()); - } - - explicit FunctionBuildId(bool is_distributed_) : is_distributed(is_distributed_) - { - } - - String getName() const override - { - return name; - } - - size_t getNumberOfArguments() const override - { - return 0; - } - - bool isDeterministic() const override { return false; } - bool isDeterministicInScopeOfQuery() const override { return true; } - bool isSuitableForConstantFolding() const override { return !is_distributed; } - bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override - { - return false; - } - - DataTypePtr getReturnTypeImpl(const DataTypes & /*arguments*/) const override - { - return std::make_shared(); - } - - ColumnPtr executeImpl(const ColumnsWithTypeAndName &, const DataTypePtr &, size_t input_rows_count) const override - { - return DataTypeString().createColumnConst(input_rows_count, SymbolIndex::instance()->getBuildIDHex()); - } - -private: - bool is_distributed; -}; + public: + static FunctionPtr create(ContextPtr context) { return std::make_shared(context); } + explicit FunctionBuildId(ContextPtr context) : FunctionServerConstantBase(context, SymbolIndex::instance()->getBuildIDHex()) {} + }; } void registerFunctionBuildId(FunctionFactory & factory) diff --git a/src/Functions/hostName.cpp b/src/Functions/hostName.cpp index 2739b37e175..564f68a911d 100644 --- a/src/Functions/hostName.cpp +++ b/src/Functions/hostName.cpp @@ -1,64 +1,21 @@ -#include -#include +#include #include #include -#include -#include - namespace DB { namespace { + constexpr char name[] = "hostName"; -/// Get the host name. Is is constant on single server, but is not constant in distributed queries. -class FunctionHostName : public IFunction -{ -public: - static constexpr auto name = "hostName"; - static FunctionPtr create(ContextPtr context) + /// Get the host name. Is is constant on single server, but is not constant in distributed queries. + class FunctionHostName : public FunctionServerConstantBase { - return std::make_shared(context->isDistributed()); - } - - explicit FunctionHostName(bool is_distributed_) : is_distributed(is_distributed_) - { - } - - String getName() const override - { - return name; - } - - bool isDeterministic() const override { return false; } - - bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } - - bool isDeterministicInScopeOfQuery() const override - { - return true; - } - - bool isSuitableForConstantFolding() const override { return !is_distributed; } - - size_t getNumberOfArguments() const override - { - return 0; - } - - DataTypePtr getReturnTypeImpl(const DataTypes & /*arguments*/) const override - { - return std::make_shared(); - } - - ColumnPtr executeImpl(const ColumnsWithTypeAndName &, const DataTypePtr & result_type, size_t input_rows_count) const override - { - return result_type->createColumnConst(input_rows_count, DNSResolver::instance().getHostName()); - } -private: - bool is_distributed; -}; + public: + static FunctionPtr create(ContextPtr context) { return std::make_shared(context); } + explicit FunctionHostName(ContextPtr context) : FunctionServerConstantBase(context, DNSResolver::instance().getHostName()) {} + }; } void registerFunctionHostName(FunctionFactory & factory) diff --git a/src/Functions/registerFunctionsMiscellaneous.cpp b/src/Functions/registerFunctionsMiscellaneous.cpp index 04561203c67..dfd986c5f82 100644 --- a/src/Functions/registerFunctionsMiscellaneous.cpp +++ b/src/Functions/registerFunctionsMiscellaneous.cpp @@ -80,6 +80,7 @@ void registerFunctionIsIPAddressContainedIn(FunctionFactory &); void registerFunctionQueryID(FunctionFactory & factory); void registerFunctionInitialQueryID(FunctionFactory & factory); void registerFunctionServerUUID(FunctionFactory &); +void registerFunctionZooKeeperSessionUptime(FunctionFactory &); #if USE_ICU void registerFunctionConvertCharset(FunctionFactory &); @@ -160,6 +161,7 @@ void registerFunctionsMiscellaneous(FunctionFactory & factory) registerFunctionQueryID(factory); registerFunctionInitialQueryID(factory); registerFunctionServerUUID(factory); + registerFunctionZooKeeperSessionUptime(factory); #if USE_ICU registerFunctionConvertCharset(factory); diff --git a/src/Functions/serverUUID.cpp b/src/Functions/serverUUID.cpp index 4b70b1576ac..4d353a81df2 100644 --- a/src/Functions/serverUUID.cpp +++ b/src/Functions/serverUUID.cpp @@ -1,54 +1,21 @@ -#include +#include #include -#include -#include - +#include namespace DB { namespace { + constexpr char name[] = "serverUUID"; -class FunctionServerUUID : public IFunction + class FunctionServerUUID : public FunctionServerConstantBase { public: - static constexpr auto name = "serverUUID"; + static FunctionPtr create(ContextPtr context) { return std::make_shared(context); } - static FunctionPtr create(ContextPtr context) - { - return std::make_shared(context->isDistributed(), ServerUUID::get()); - } - - explicit FunctionServerUUID(bool is_distributed_, UUID server_uuid_) - : is_distributed(is_distributed_), server_uuid(server_uuid_) - { - } - - String getName() const override { return name; } - - size_t getNumberOfArguments() const override { return 0; } - - DataTypePtr getReturnTypeImpl(const DataTypes &) const override { return std::make_shared(); } - - bool isDeterministic() const override { return false; } - - bool isDeterministicInScopeOfQuery() const override { return true; } - - bool isSuitableForConstantFolding() const override { return !is_distributed; } - - bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo &) const override { return false; } - - ColumnPtr executeImpl(const ColumnsWithTypeAndName &, const DataTypePtr &, size_t input_rows_count) const override - { - return DataTypeUUID().createColumnConst(input_rows_count, server_uuid); - } - - private: - bool is_distributed; - const UUID server_uuid; + explicit FunctionServerUUID(ContextPtr context) : FunctionServerConstantBase(context, ServerUUID::get()) {} }; - } void registerFunctionServerUUID(FunctionFactory & factory) diff --git a/src/Functions/tcpPort.cpp b/src/Functions/tcpPort.cpp index 10b89faa1be..b2e46d929cf 100644 --- a/src/Functions/tcpPort.cpp +++ b/src/Functions/tcpPort.cpp @@ -1,52 +1,20 @@ +#include #include -#include -#include - namespace DB { namespace { + constexpr char name[] = "tcpPort"; -class FunctionTcpPort : public IFunction -{ -public: - static constexpr auto name = "tcpPort"; - - static FunctionPtr create(ContextPtr context) + class FunctionTcpPort : public FunctionServerConstantBase { - return std::make_shared(context->isDistributed(), context->getTCPPort()); - } - - explicit FunctionTcpPort(bool is_distributed_, UInt16 port_) : is_distributed(is_distributed_), port(port_) - { - } - - String getName() const override { return name; } - - size_t getNumberOfArguments() const override { return 0; } - - DataTypePtr getReturnTypeImpl(const DataTypes &) const override { return std::make_shared(); } - - bool isDeterministic() const override { return false; } - - bool isDeterministicInScopeOfQuery() const override { return true; } - - bool isSuitableForConstantFolding() const override { return !is_distributed; } - - bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } - - ColumnPtr executeImpl(const ColumnsWithTypeAndName &, const DataTypePtr &, size_t input_rows_count) const override - { - return DataTypeUInt16().createColumnConst(input_rows_count, port); - } - -private: - bool is_distributed; - const UInt64 port; -}; + public: + static FunctionPtr create(ContextPtr context) { return std::make_shared(context); } + explicit FunctionTcpPort(ContextPtr context) : FunctionServerConstantBase(context, context->getTCPPort()) {} + }; } void registerFunctionTcpPort(FunctionFactory & factory) diff --git a/src/Functions/timezone.cpp b/src/Functions/timezone.cpp index 3b2319c22ca..9694d22c8b8 100644 --- a/src/Functions/timezone.cpp +++ b/src/Functions/timezone.cpp @@ -1,59 +1,22 @@ -#include -#include -#include -#include +#include #include -#include +#include namespace DB { namespace { + constexpr char name[] = "timezone"; -/** Returns the server time zone. - */ -class FunctionTimezone : public IFunction -{ -public: - static constexpr auto name = "timezone"; - static FunctionPtr create(ContextPtr context) + /// Returns the server time zone. + class FunctionTimezone : public FunctionServerConstantBase { - return std::make_shared(context->isDistributed()); - } - - explicit FunctionTimezone(bool is_distributed_) : is_distributed(is_distributed_) - { - } - - String getName() const override - { - return name; - } - size_t getNumberOfArguments() const override - { - return 0; - } - - DataTypePtr getReturnTypeImpl(const DataTypes & /*arguments*/) const override - { - return std::make_shared(); - } - - bool isDeterministic() const override { return false; } - bool isDeterministicInScopeOfQuery() const override { return true; } - bool isSuitableForConstantFolding() const override { return !is_distributed; } - - bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } - - ColumnPtr executeImpl(const ColumnsWithTypeAndName &, const DataTypePtr &, size_t input_rows_count) const override - { - return DataTypeString().createColumnConst(input_rows_count, DateLUT::instance().getTimeZone()); - } -private: - bool is_distributed; -}; + public: + static FunctionPtr create(ContextPtr context) { return std::make_shared(context); } + explicit FunctionTimezone(ContextPtr context) : FunctionServerConstantBase(context, String{DateLUT::instance().getTimeZone()}) {} + }; } void registerFunctionTimezone(FunctionFactory & factory) diff --git a/src/Functions/uptime.cpp b/src/Functions/uptime.cpp index bb767101fea..9c2a9d35762 100644 --- a/src/Functions/uptime.cpp +++ b/src/Functions/uptime.cpp @@ -1,58 +1,22 @@ -#include -#include +#include #include -#include - namespace DB { -/** Returns server uptime in seconds. - */ -class FunctionUptime : public IFunction +namespace { -public: - static constexpr auto name = "uptime"; - static FunctionPtr create(ContextPtr context) + constexpr char name[] = "uptime"; + + /// Returns server uptime in seconds. + class FunctionUptime : public FunctionServerConstantBase { - return std::make_shared(context->isDistributed(), context->getUptimeSeconds()); - } - - explicit FunctionUptime(bool is_distributed_, time_t uptime_) : is_distributed(is_distributed_), uptime(uptime_) - { - } - - String getName() const override - { - return name; - } - - size_t getNumberOfArguments() const override - { - return 0; - } - - DataTypePtr getReturnTypeImpl(const DataTypes & /*arguments*/) const override - { - return std::make_shared(); - } - - bool isDeterministic() const override { return false; } - bool isDeterministicInScopeOfQuery() const override { return true; } - bool isSuitableForConstantFolding() const override { return !is_distributed; } - - bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } - - ColumnPtr executeImpl(const ColumnsWithTypeAndName &, const DataTypePtr &, size_t input_rows_count) const override - { - return DataTypeUInt32().createColumnConst(input_rows_count, static_cast(uptime)); - } - -private: - bool is_distributed; - time_t uptime; -}; + public: + static FunctionPtr create(ContextPtr context) { return std::make_shared(context); } + explicit FunctionUptime(ContextPtr context) : FunctionServerConstantBase(context, context->getUptimeSeconds()) {} + }; +} void registerFunctionUptime(FunctionFactory & factory) { diff --git a/src/Functions/version.cpp b/src/Functions/version.cpp index 81e40655eef..d59774bd23d 100644 --- a/src/Functions/version.cpp +++ b/src/Functions/version.cpp @@ -1,8 +1,5 @@ -#include -#include +#include #include -#include -#include #if !defined(ARCADIA_BUILD) # include @@ -11,49 +8,19 @@ namespace DB { -/** version() - returns the current version as a string. - */ -class FunctionVersion : public IFunction +namespace { -public: - static constexpr auto name = "version"; - static FunctionPtr create(ContextPtr context) + constexpr char name[] = "version"; + + /// version() - returns the current version as a string. + class FunctionVersion : public FunctionServerConstantBase { - return std::make_shared(context->isDistributed()); - } - - explicit FunctionVersion(bool is_distributed_) : is_distributed(is_distributed_) - { - } - - String getName() const override - { - return name; - } - - bool isDeterministic() const override { return false; } - bool isDeterministicInScopeOfQuery() const override { return true; } - bool isSuitableForConstantFolding() const override { return !is_distributed; } - bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } - - size_t getNumberOfArguments() const override - { - return 0; - } - - DataTypePtr getReturnTypeImpl(const DataTypes & /*arguments*/) const override - { - return std::make_shared(); - } - - ColumnPtr executeImpl(const ColumnsWithTypeAndName &, const DataTypePtr &, size_t input_rows_count) const override - { - return DataTypeString().createColumnConst(input_rows_count, VERSION_STRING); - } -private: - bool is_distributed; -}; + public: + static FunctionPtr create(ContextPtr context) { return std::make_shared(context); } + explicit FunctionVersion(ContextPtr context) : FunctionServerConstantBase(context, VERSION_STRING) {} + }; +} void registerFunctionVersion(FunctionFactory & factory) { diff --git a/src/Functions/zookeeperSessionUptime.cpp b/src/Functions/zookeeperSessionUptime.cpp new file mode 100644 index 00000000000..b5163b7b22a --- /dev/null +++ b/src/Functions/zookeeperSessionUptime.cpp @@ -0,0 +1,25 @@ +#include +#include + + +namespace DB +{ +namespace +{ + constexpr char name[] = "zookeeperSessionUptime"; + + class FunctionZooKeeperSessionUptime : public FunctionServerConstantBase + { + public: + FunctionZooKeeperSessionUptime(ContextPtr context) : FunctionServerConstantBase(context, context->getZooKeeperSessionUptime()) {} + + static FunctionPtr create(ContextPtr context) { return std::make_shared(context); } + }; +} + +void registerFunctionZooKeeperSessionUptime(FunctionFactory & factory) +{ + factory.registerFunction(); +} + +} diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index f0ad46faf9a..505e6f205d9 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -1665,6 +1665,14 @@ zkutil::ZooKeeperPtr Context::getZooKeeper() const return shared->zookeeper; } +UInt32 Context::getZooKeeperSessionUptime() const +{ + std::lock_guard lock(shared->zookeeper_mutex); + if (!shared->zookeeper || shared->zookeeper->expired()) + return 0; + return shared->zookeeper->getSessionUptime(); +} + void Context::setSystemZooKeeperLogAfterInitializationIfNeeded() { /// It can be nearly impossible to understand in which order global objects are initialized on server startup. diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index e186b81101a..9c5cedb7a7a 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -652,6 +652,8 @@ public: /// Same as above but return a zookeeper connection from auxiliary_zookeepers configuration entry. std::shared_ptr getAuxiliaryZooKeeper(const String & name) const; + UInt32 getZooKeeperSessionUptime() const; + #if USE_NURAFT std::shared_ptr & getKeeperDispatcher() const; #endif From be45ed5fde8bc0d6b1933943a557aa9f063763d2 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Mon, 13 Sep 2021 16:36:05 +0300 Subject: [PATCH 25/80] better check for session expiration in clickhouse-test --- tests/clickhouse-test | 45 +++++++++++++++++++++++++++++++------------ 1 file changed, 33 insertions(+), 12 deletions(-) diff --git a/tests/clickhouse-test b/tests/clickhouse-test index c3136c4fd52..b6a3a3e3106 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -9,6 +9,7 @@ import re import json import copy import traceback +import math from argparse import ArgumentParser from typing import Tuple, Union, Optional, TextIO @@ -43,19 +44,10 @@ except ImportError: DISTRIBUTED_DDL_TIMEOUT_MSG = "is executing longer than distributed_ddl_task_timeout" MESSAGES_TO_RETRY = [ - "DB::Exception: ZooKeeper session has been expired", - "Coordination::Exception: Session expired", - "Coordination::Exception: Connection loss", - "Coordination::Exception: Operation timeout", - "DB::Exception: Session expired", - "DB::Exception: Connection loss", - "DB::Exception: Operation timeout", - "Operation timed out", "ConnectionPoolWithFailover: Connection failed at try", "DB::Exception: New table appeared in database being dropped or detached. Try again", "is already started to be removing by another replica right now", "DB::Exception: Cannot enqueue query", - "Shutdown is called for table", # It happens in SYSTEM SYNC REPLICA query if session with ZooKeeper is being reinitialized. DISTRIBUTED_DDL_TIMEOUT_MSG # FIXME ] @@ -295,7 +287,32 @@ def run_single_test(args, ext, server_logs_level, client_options, case_file, std return proc, stdout, stderr, total_time -def need_retry(stdout, stderr): +def get_zookeeper_session_uptime(args): + try: + query = b"SELECT zookeeperSessionUptime()" + + if args.replicated_database: + query = b"SELECT min(materialize(zookeeperSessionUptime())) " \ + b"FROM clusterAllReplicas('test_cluster_database_replicated', system.one) " + + clickhouse_proc = open_client_process(args.client) + + (stdout, _) = clickhouse_proc.communicate((query), timeout=20) + + return int(stdout.decode('utf-8').strip()) + except Exception as ex: + print("Exception", ex) + return None + + +def need_retry(args, stdout, stderr, total_time): + # Sometimes we may get unexpected exception like "Replica is readonly" or "Shutdown is called for table" + # instead of "Session expired" or "Connection loss" + # Retry if session was expired during test execution + session_uptime = get_zookeeper_session_uptime(args) + if session_uptime is not None and session_uptime < math.ceil(total_time): + return True + return any(msg in stdout for msg in MESSAGES_TO_RETRY) or any(msg in stderr for msg in MESSAGES_TO_RETRY) @@ -597,7 +614,7 @@ def run_tests_array(all_tests_with_params): status += 'Database: ' + testcase_args.testcase_database else: counter = 1 - while need_retry(stdout, stderr): + while need_retry(args, stdout, stderr, total_time): restarted_tests.append((case_file, stderr)) testcase_args = configure_testcase_args(args, case_file, suite_tmp_dir, stderr_file) proc, stdout, stderr, total_time = run_single_test(testcase_args, ext, server_logs_level, client_options, case_file, stdout_file, stderr_file) @@ -1102,13 +1119,17 @@ def main(args): def create_common_database(args, db_name): create_database_retries = 0 while create_database_retries < MAX_RETRIES: + start_time = datetime.now() + client_cmd = args.client + " " + get_additional_client_options(args) clickhouse_proc_create = open_client_process(client_cmd, universal_newlines=True) (stdout, stderr) = clickhouse_proc_create.communicate(("CREATE DATABASE IF NOT EXISTS " + db_name + get_db_engine(args, db_name))) - if not need_retry(stdout, stderr): + total_time = (datetime.now() - start_time).total_seconds() + + if not need_retry(args, stdout, stderr, total_time): break create_database_retries += 1 From df56e99b87f63b5650057d7ecbad597354f2867f Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Mon, 13 Sep 2021 22:11:16 +0300 Subject: [PATCH 26/80] fixes after review --- src/Backups/renameInCreateQuery.cpp | 25 ++--- src/Core/QualifiedTableName.h | 47 +++++++++ src/Databases/DDLDependencyVisitor.cpp | 55 ++++------- src/Databases/DDLDependencyVisitor.h | 5 +- src/Databases/DatabaseAtomic.cpp | 10 +- src/Databases/DatabaseAtomic.h | 4 +- src/Databases/DatabaseLazy.cpp | 2 +- src/Databases/DatabaseLazy.h | 2 +- src/Databases/DatabaseOnDisk.cpp | 4 +- src/Databases/DatabaseOnDisk.h | 2 +- src/Databases/DatabaseOrdinary.cpp | 40 ++++---- src/Databases/DatabaseOrdinary.h | 2 +- src/Databases/DatabaseReplicated.cpp | 8 +- src/Databases/DatabaseReplicated.h | 4 +- src/Databases/IDatabase.h | 4 +- src/Databases/TablesLoader.cpp | 98 ++++++++++--------- src/Databases/TablesLoader.h | 28 +++--- .../PostgreSQLDictionarySource.cpp | 17 ++-- src/Dictionaries/XDBCDictionarySource.cpp | 17 +--- .../getDictionaryConfigurationFromAST.cpp | 27 ++++- .../getDictionaryConfigurationFromAST.h | 9 ++ src/Functions/FunctionJoinGet.cpp | 21 +--- .../ExternalDictionariesLoader.cpp | 45 ++++----- src/Interpreters/ExternalDictionariesLoader.h | 2 +- src/Interpreters/loadMetadata.h | 2 + src/TableFunctions/TableFunctionRemote.cpp | 11 +-- 26 files changed, 272 insertions(+), 219 deletions(-) diff --git a/src/Backups/renameInCreateQuery.cpp b/src/Backups/renameInCreateQuery.cpp index a36995654ee..5d99ea585b5 100644 --- a/src/Backups/renameInCreateQuery.cpp +++ b/src/Backups/renameInCreateQuery.cpp @@ -160,26 +160,29 @@ namespace if (args.size() <= db_name_index) return; - String db_name = evaluateConstantExpressionForDatabaseName(args[db_name_index], data.context)->as().value.safeGet(); + String name = evaluateConstantExpressionForDatabaseName(args[db_name_index], data.context)->as().value.safeGet(); - String table_name; size_t table_name_index = static_cast(-1); - size_t dot = String::npos; - if (function.name != "Distributed") - dot = db_name.find('.'); - if (dot != String::npos) - { - table_name = db_name.substr(dot + 1); - db_name.resize(dot); - } + + QualifiedTableName qualified_name; + + if (function.name == "Distributed") + qualified_name.table = name; else + qualified_name = QualifiedTableName::parseFromString(name); + + if(qualified_name.database.empty()) { + std::swap(qualified_name.database, qualified_name.table); table_name_index = 2; if (args.size() <= table_name_index) return; - table_name = evaluateConstantExpressionForDatabaseName(args[table_name_index], data.context)->as().value.safeGet(); + qualified_name.table = evaluateConstantExpressionForDatabaseName(args[table_name_index], data.context)->as().value.safeGet(); } + const String & db_name = qualified_name.database; + const String & table_name = qualified_name.table; + if (db_name.empty() || table_name.empty()) return; diff --git a/src/Core/QualifiedTableName.h b/src/Core/QualifiedTableName.h index 2b48d38ca2f..dd043b86ee1 100644 --- a/src/Core/QualifiedTableName.h +++ b/src/Core/QualifiedTableName.h @@ -2,6 +2,8 @@ #include #include +#include +#include #include #include #include @@ -9,6 +11,11 @@ namespace DB { +namespace ErrorCodes +{ +extern const int SYNTAX_ERROR; +} + //TODO replace with StorageID struct QualifiedTableName { @@ -32,6 +39,46 @@ struct QualifiedTableName hash_state.update(table.data(), table.size()); return hash_state.get64(); } + + /// NOTE: It's different from compound identifier parsing and does not support escaping and dots in name. + /// Usually it's better to use ParserIdentifier instead, + /// but we parse DDL dictionary name (and similar things) this way for historical reasons. + static std::optional tryParseFromString(const String & maybe_qualified_name) + { + if (maybe_qualified_name.empty()) + return {}; + + /// Do not allow dot at the beginning and at the end + auto pos = maybe_qualified_name.find('.'); + if (pos == 0 || pos == (maybe_qualified_name.size() - 1)) + return {}; + + QualifiedTableName name; + if (pos == std::string::npos) + { + name.table = std::move(maybe_qualified_name); + } + else if (maybe_qualified_name.find('.', pos + 1)) + { + /// Do not allow multiple dots + return {}; + } + else + { + name.database = maybe_qualified_name.substr(0, pos); + name.table = maybe_qualified_name.substr(pos + 1); + } + + return name; + } + + static QualifiedTableName parseFromString(const String & maybe_qualified_name) + { + auto name = tryParseFromString(maybe_qualified_name); + if (!name) + throw Exception(ErrorCodes::SYNTAX_ERROR, "Invalid qualified name: {}", maybe_qualified_name); + return *name; + } }; } diff --git a/src/Databases/DDLDependencyVisitor.cpp b/src/Databases/DDLDependencyVisitor.cpp index 2b70421641b..0399ec59b16 100644 --- a/src/Databases/DDLDependencyVisitor.cpp +++ b/src/Databases/DDLDependencyVisitor.cpp @@ -4,8 +4,6 @@ #include #include #include -#include -#include #include namespace DB @@ -13,6 +11,7 @@ namespace DB void DDLDependencyVisitor::visit(const ASTPtr & ast, Data & data) { + /// Looking for functions in column default expressions and dictionary source definition if (const auto * function = ast->as()) visit(*function, data); else if (const auto * dict_source = ast->as()) @@ -48,20 +47,14 @@ void DDLDependencyVisitor::visit(const ASTFunctionWithKeyValueArguments & dict_s return; auto config = getDictionaryConfigurationFromAST(data.create_query->as(), data.global_context); - String host = config->getString("dictionary.source.clickhouse.host", ""); - UInt16 port = config->getUInt("dictionary.source.clickhouse.port", 0); - String database = config->getString("dictionary.source.clickhouse.db", ""); - String table = config->getString("dictionary.source.clickhouse.table", ""); - bool secure = config->getBool("dictionary.source.clickhouse.secure", false); - if (host.empty() || port == 0 || table.empty()) - return; - UInt16 default_port = secure ? data.global_context->getTCPPortSecure().value_or(0) : data.global_context->getTCPPort(); - if (!isLocalAddress({host, port}, default_port)) + auto info = getInfoIfClickHouseDictionarySource(config, data.global_context); + + if (!info || !info->is_local) return; - if (database.empty()) - database = data.default_database; - data.dependencies.emplace(QualifiedTableName{std::move(database), std::move(table)}); + if (info->table_name.database.empty()) + info->table_name.database = data.default_database; + data.dependencies.emplace(std::move(info->table_name)); } @@ -71,8 +64,7 @@ void DDLDependencyVisitor::extractTableNameFromArgument(const ASTFunction & func if (!function.arguments || function.arguments->children.size() <= arg_idx) return; - String database_name; - String table_name; + QualifiedTableName qualified_name; const auto * arg = function.arguments->as()->children[arg_idx].get(); if (const auto * literal = arg->as()) @@ -80,31 +72,22 @@ void DDLDependencyVisitor::extractTableNameFromArgument(const ASTFunction & func if (literal->value.getType() != Field::Types::String) return; - String maybe_qualified_name = literal->value.get(); - auto pos = maybe_qualified_name.find('.'); - if (pos == 0 || pos == (maybe_qualified_name.size() - 1)) - { - /// Most likely name is invalid + auto maybe_qualified_name = QualifiedTableName::tryParseFromString(literal->value.get()); + /// Just return if name if invalid + if (!maybe_qualified_name) return; - } - else if (pos == std::string::npos) - { - table_name = std::move(maybe_qualified_name); - } - else - { - database_name = maybe_qualified_name.substr(0, pos); - table_name = maybe_qualified_name.substr(pos + 1); - } + + qualified_name = std::move(*maybe_qualified_name); } else if (const auto * identifier = arg->as()) { auto table_identifier = identifier->createTable(); + /// Just return if table identified is invalid if (!table_identifier) return; - database_name = table_identifier->getDatabaseName(); - table_name = table_identifier->shortName(); + qualified_name.database = table_identifier->getDatabaseName(); + qualified_name.table = table_identifier->shortName(); } else { @@ -112,9 +95,9 @@ void DDLDependencyVisitor::extractTableNameFromArgument(const ASTFunction & func return; } - if (database_name.empty()) - database_name = data.default_database; - data.dependencies.emplace(QualifiedTableName{std::move(database_name), std::move(table_name)}); + if (qualified_name.database.empty()) + qualified_name.database = data.default_database; + data.dependencies.emplace(std::move(qualified_name)); } } diff --git a/src/Databases/DDLDependencyVisitor.h b/src/Databases/DDLDependencyVisitor.h index 1d26adb6e6d..c0b39d70b08 100644 --- a/src/Databases/DDLDependencyVisitor.h +++ b/src/Databases/DDLDependencyVisitor.h @@ -9,7 +9,10 @@ namespace DB class ASTFunction; class ASTFunctionWithKeyValueArguments; - +/// Visits ASTCreateQuery and extracts names of table (or dictionary) dependencies +/// from column default expressions (joinGet, dictGet, etc) +/// or dictionary source (for dictionaries from local ClickHouse table). +/// Does not validate AST, works a best-effort way. class DDLDependencyVisitor { public: diff --git a/src/Databases/DatabaseAtomic.cpp b/src/Databases/DatabaseAtomic.cpp index 83763ccd856..5c75f6f1036 100644 --- a/src/Databases/DatabaseAtomic.cpp +++ b/src/Databases/DatabaseAtomic.cpp @@ -416,9 +416,9 @@ UUID DatabaseAtomic::tryGetTableUUID(const String & table_name) const return UUIDHelpers::Nil; } -void DatabaseAtomic::beforeLoadingMetadata(ContextMutablePtr /*context*/, bool has_force_restore_data_flag, bool /*force_attach*/) +void DatabaseAtomic::beforeLoadingMetadata(ContextMutablePtr /*context*/, bool force_restore, bool /*force_attach*/) { - if (!has_force_restore_data_flag) + if (!force_restore) return; /// Recreate symlinks to table data dirs in case of force restore, because some of them may be broken @@ -435,10 +435,10 @@ void DatabaseAtomic::beforeLoadingMetadata(ContextMutablePtr /*context*/, bool h } void DatabaseAtomic::loadStoredObjects( - ContextMutablePtr local_context, bool has_force_restore_data_flag, bool force_attach, bool skip_startup_tables) + ContextMutablePtr local_context, bool force_restore, bool force_attach, bool skip_startup_tables) { - beforeLoadingMetadata(local_context, has_force_restore_data_flag, force_attach); - DatabaseOrdinary::loadStoredObjects(local_context, has_force_restore_data_flag, force_attach, skip_startup_tables); + beforeLoadingMetadata(local_context, force_restore, force_attach); + DatabaseOrdinary::loadStoredObjects(local_context, force_restore, force_attach, skip_startup_tables); } void DatabaseAtomic::startupTables(ThreadPool & thread_pool, bool force_restore, bool force_attach) diff --git a/src/Databases/DatabaseAtomic.h b/src/Databases/DatabaseAtomic.h index db9cef4dbc6..1fe13f8b27f 100644 --- a/src/Databases/DatabaseAtomic.h +++ b/src/Databases/DatabaseAtomic.h @@ -47,9 +47,9 @@ public: DatabaseTablesIteratorPtr getTablesIterator(ContextPtr context, const FilterByNameFunction & filter_by_table_name) const override; - void loadStoredObjects(ContextMutablePtr context, bool has_force_restore_data_flag, bool force_attach, bool skip_startup_tables) override; + void loadStoredObjects(ContextMutablePtr context, bool force_restore, bool force_attach, bool skip_startup_tables) override; - void beforeLoadingMetadata(ContextMutablePtr context, bool has_force_restore_data_flag, bool force_attach) override; + void beforeLoadingMetadata(ContextMutablePtr context, bool force_restore, bool force_attach) override; void startupTables(ThreadPool & thread_pool, bool force_restore, bool force_attach) override; diff --git a/src/Databases/DatabaseLazy.cpp b/src/Databases/DatabaseLazy.cpp index 7e0e1b7aa43..384c5ff47dd 100644 --- a/src/Databases/DatabaseLazy.cpp +++ b/src/Databases/DatabaseLazy.cpp @@ -36,7 +36,7 @@ DatabaseLazy::DatabaseLazy(const String & name_, const String & metadata_path_, void DatabaseLazy::loadStoredObjects( - ContextMutablePtr local_context, bool /* has_force_restore_data_flag */, bool /*force_attach*/, bool /* skip_startup_tables */) + ContextMutablePtr local_context, bool /* force_restore */, bool /*force_attach*/, bool /* skip_startup_tables */) { iterateMetadataFiles(local_context, [this](const String & file_name) { diff --git a/src/Databases/DatabaseLazy.h b/src/Databases/DatabaseLazy.h index bc79a49b2fe..45c816c2e76 100644 --- a/src/Databases/DatabaseLazy.h +++ b/src/Databases/DatabaseLazy.h @@ -26,7 +26,7 @@ public: bool canContainDistributedTables() const override { return false; } - void loadStoredObjects(ContextMutablePtr context, bool has_force_restore_data_flag, bool force_attach, bool skip_startup_tables) override; + void loadStoredObjects(ContextMutablePtr context, bool force_restore, bool force_attach, bool skip_startup_tables) override; void createTable( ContextPtr context, diff --git a/src/Databases/DatabaseOnDisk.cpp b/src/Databases/DatabaseOnDisk.cpp index 620e560b64c..40edeb5cd27 100644 --- a/src/Databases/DatabaseOnDisk.cpp +++ b/src/Databases/DatabaseOnDisk.cpp @@ -46,7 +46,7 @@ std::pair createTableFromAST( const String & database_name, const String & table_data_path_relative, ContextMutablePtr context, - bool has_force_restore_data_flag) + bool force_restore) { ast_create_query.attach = true; ast_create_query.database = database_name; @@ -88,7 +88,7 @@ std::pair createTableFromAST( context->getGlobalContext(), columns, constraints, - has_force_restore_data_flag) + force_restore) }; } diff --git a/src/Databases/DatabaseOnDisk.h b/src/Databases/DatabaseOnDisk.h index e7dda7cb36b..74056d887ae 100644 --- a/src/Databases/DatabaseOnDisk.h +++ b/src/Databases/DatabaseOnDisk.h @@ -16,7 +16,7 @@ std::pair createTableFromAST( const String & database_name, const String & table_data_path_relative, ContextMutablePtr context, - bool has_force_restore_data_flag); + bool force_restore); /** Get the string with the table definition based on the CREATE query. * It is an ATTACH query that you can execute to create a table from the correspondent database. diff --git a/src/Databases/DatabaseOrdinary.cpp b/src/Databases/DatabaseOrdinary.cpp index 4c73d3c30ff..1bdb273c9fb 100644 --- a/src/Databases/DatabaseOrdinary.cpp +++ b/src/Databases/DatabaseOrdinary.cpp @@ -39,7 +39,7 @@ namespace DatabaseOrdinary & database, const String & database_name, const String & metadata_path, - bool has_force_restore_data_flag) + bool force_restore) { try { @@ -48,7 +48,7 @@ namespace database_name, database.getTableDataPath(query), context, - has_force_restore_data_flag); + force_restore); database.attachTable(table_name, table, database.getTableDataPath(query)); } @@ -75,7 +75,7 @@ DatabaseOrdinary::DatabaseOrdinary( } void DatabaseOrdinary::loadStoredObjects( - ContextMutablePtr local_context, bool has_force_restore_data_flag, bool force_attach, bool skip_startup_tables) + ContextMutablePtr local_context, bool force_restore, bool force_attach, bool skip_startup_tables) { /** Tables load faster if they are loaded in sorted (by name) order. * Otherwise (for the ext4 filesystem), `DirectoryIterator` iterates through them in some order, @@ -85,7 +85,7 @@ void DatabaseOrdinary::loadStoredObjects( ParsedTablesMetadata metadata; loadTablesMetadata(local_context, metadata); - size_t total_tables = metadata.metadata.size() - metadata.total_dictionaries; + size_t total_tables = metadata.parsed_tables.size() - metadata.total_dictionaries; AtomicStopwatch watch; std::atomic dictionaries_processed{0}; @@ -101,18 +101,18 @@ void DatabaseOrdinary::loadStoredObjects( /// loading of its config only, it doesn't involve loading the dictionary itself. /// Attach dictionaries. - for (const auto & name_with_path_and_query : metadata.metadata) + for (const auto & name_with_path_and_query : metadata.parsed_tables) { const auto & name = name_with_path_and_query.first; - const auto & path = name_with_path_and_query.second.first; - const auto & ast = name_with_path_and_query.second.second; + const auto & path = name_with_path_and_query.second.path; + const auto & ast = name_with_path_and_query.second.ast; const auto & create_query = ast->as(); if (create_query.is_dictionary) { pool.scheduleOrThrowOnError([&]() { - loadTableFromMetadata(local_context, path, name, ast, has_force_restore_data_flag); + loadTableFromMetadata(local_context, path, name, ast, force_restore); /// Messages, so that it's not boring to wait for the server to load for a long time. logAboutProgress(log, ++dictionaries_processed, metadata.total_dictionaries, watch); @@ -123,18 +123,18 @@ void DatabaseOrdinary::loadStoredObjects( pool.wait(); /// Attach tables. - for (const auto & name_with_path_and_query : metadata.metadata) + for (const auto & name_with_path_and_query : metadata.parsed_tables) { const auto & name = name_with_path_and_query.first; - const auto & path = name_with_path_and_query.second.first; - const auto & ast = name_with_path_and_query.second.second; + const auto & path = name_with_path_and_query.second.path; + const auto & ast = name_with_path_and_query.second.ast; const auto & create_query = ast->as(); if (!create_query.is_dictionary) { pool.scheduleOrThrowOnError([&]() { - loadTableFromMetadata(local_context, path, name, ast, has_force_restore_data_flag); + loadTableFromMetadata(local_context, path, name, ast, force_restore); /// Messages, so that it's not boring to wait for the server to load for a long time. logAboutProgress(log, ++tables_processed, total_tables, watch); @@ -147,13 +147,13 @@ void DatabaseOrdinary::loadStoredObjects( if (!skip_startup_tables) { /// After all tables was basically initialized, startup them. - startupTables(pool, has_force_restore_data_flag, force_attach); + startupTables(pool, force_restore, force_attach); } } void DatabaseOrdinary::loadTablesMetadata(ContextPtr local_context, ParsedTablesMetadata & metadata) { - size_t prev_tables_count = metadata.metadata.size(); + size_t prev_tables_count = metadata.parsed_tables.size(); size_t prev_total_dictionaries = metadata.total_dictionaries; auto process_metadata = [&metadata, this](const String & file_name) @@ -190,16 +190,16 @@ void DatabaseOrdinary::loadTablesMetadata(ContextPtr local_context, ParsedTables QualifiedTableName qualified_name{database_name, create_query->table}; std::lock_guard lock{metadata.mutex}; - metadata.metadata[qualified_name] = std::make_pair(full_path.string(), std::move(ast)); + metadata.parsed_tables[qualified_name] = ParsedTableMetadata{full_path.string(), ast}; if (data.dependencies.empty()) { - metadata.independent_tables.emplace_back(std::move(qualified_name)); + metadata.independent_database_objects.emplace_back(std::move(qualified_name)); } else { for (const auto & dependency : data.dependencies) { - metadata.dependencies_info[dependency].dependent_tables.push_back(qualified_name); + metadata.dependencies_info[dependency].dependent_database_objects.push_back(qualified_name); ++metadata.dependencies_info[qualified_name].dependencies_count; } } @@ -215,11 +215,12 @@ void DatabaseOrdinary::loadTablesMetadata(ContextPtr local_context, ParsedTables iterateMetadataFiles(local_context, process_metadata); - size_t objects_in_database = metadata.metadata.size() - prev_tables_count; + size_t objects_in_database = metadata.parsed_tables.size() - prev_tables_count; size_t dictionaries_in_database = metadata.total_dictionaries - prev_total_dictionaries; size_t tables_in_database = objects_in_database - dictionaries_in_database; - LOG_INFO(log, "Total {} tables and {} dictionaries.", tables_in_database, dictionaries_in_database); + LOG_INFO(log, "Metadata processed, database {} has {} tables and {} dictionaries in total.", + database_name, tables_in_database, dictionaries_in_database); } void DatabaseOrdinary::loadTableFromMetadata(ContextMutablePtr local_context, const String & file_path, const QualifiedTableName & name, const ASTPtr & ast, bool force_restore) @@ -261,6 +262,7 @@ void DatabaseOrdinary::startupTables(ThreadPool & thread_pool, bool /*force_rest } catch (...) { + /// We have to wait for jobs to finish here, because job function has reference to variables on the stack of current thread. thread_pool.wait(); throw; } diff --git a/src/Databases/DatabaseOrdinary.h b/src/Databases/DatabaseOrdinary.h index 3f300bfb3eb..5f6d9a30385 100644 --- a/src/Databases/DatabaseOrdinary.h +++ b/src/Databases/DatabaseOrdinary.h @@ -20,7 +20,7 @@ public: String getEngineName() const override { return "Ordinary"; } - void loadStoredObjects(ContextMutablePtr context, bool has_force_restore_data_flag, bool force_attach, bool skip_startup_tables) override; + void loadStoredObjects(ContextMutablePtr context, bool force_restore, bool force_attach, bool skip_startup_tables) override; bool supportsLoadingInTopologicalOrder() const override { return true; } diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp index 9aebc701aa9..c2ff002ea36 100644 --- a/src/Databases/DatabaseReplicated.cpp +++ b/src/Databases/DatabaseReplicated.cpp @@ -305,16 +305,16 @@ void DatabaseReplicated::createReplicaNodesInZooKeeper(const zkutil::ZooKeeperPt createEmptyLogEntry(current_zookeeper); } -void DatabaseReplicated::beforeLoadingMetadata(ContextMutablePtr /*context*/, bool /*has_force_restore_data_flag*/, bool force_attach) +void DatabaseReplicated::beforeLoadingMetadata(ContextMutablePtr /*context*/, bool /*force_restore*/, bool force_attach) { tryConnectToZooKeeperAndInitDatabase(force_attach); } void DatabaseReplicated::loadStoredObjects( - ContextMutablePtr local_context, bool has_force_restore_data_flag, bool force_attach, bool skip_startup_tables) + ContextMutablePtr local_context, bool force_restore, bool force_attach, bool skip_startup_tables) { - beforeLoadingMetadata(local_context, has_force_restore_data_flag, force_attach); - DatabaseAtomic::loadStoredObjects(local_context, has_force_restore_data_flag, force_attach, skip_startup_tables); + beforeLoadingMetadata(local_context, force_restore, force_attach); + DatabaseAtomic::loadStoredObjects(local_context, force_restore, force_attach, skip_startup_tables); } void DatabaseReplicated::startupTables(ThreadPool & thread_pool, bool force_restore, bool force_attach) diff --git a/src/Databases/DatabaseReplicated.h b/src/Databases/DatabaseReplicated.h index daba7dad17b..60526a1e5b0 100644 --- a/src/Databases/DatabaseReplicated.h +++ b/src/Databases/DatabaseReplicated.h @@ -57,9 +57,9 @@ public: void drop(ContextPtr /*context*/) override; - void loadStoredObjects(ContextMutablePtr context, bool has_force_restore_data_flag, bool force_attach, bool skip_startup_tables) override; + void loadStoredObjects(ContextMutablePtr context, bool force_restore, bool force_attach, bool skip_startup_tables) override; - void beforeLoadingMetadata(ContextMutablePtr context, bool has_force_restore_data_flag, bool force_attach) override; + void beforeLoadingMetadata(ContextMutablePtr context, bool force_restore, bool force_attach) override; void startupTables(ThreadPool & thread_pool, bool force_restore, bool force_attach) override; diff --git a/src/Databases/IDatabase.h b/src/Databases/IDatabase.h index fe17312cc0b..19279e545eb 100644 --- a/src/Databases/IDatabase.h +++ b/src/Databases/IDatabase.h @@ -129,7 +129,7 @@ public: /// You can call only once, right after the object is created. virtual void loadStoredObjects( ContextMutablePtr /*context*/, - bool /*has_force_restore_data_flag*/, + bool /*force_restore*/, bool /*force_attach*/ = false, bool /* skip_startup_tables */ = false) { @@ -139,7 +139,7 @@ public: virtual void beforeLoadingMetadata( ContextMutablePtr /*context*/, - bool /*has_force_restore_data_flag*/, + bool /*force_restore*/, bool /*force_attach*/) { } diff --git a/src/Databases/TablesLoader.cpp b/src/Databases/TablesLoader.cpp index 30a9bdd324e..48d751b5795 100644 --- a/src/Databases/TablesLoader.cpp +++ b/src/Databases/TablesLoader.cpp @@ -36,7 +36,7 @@ TablesLoader::TablesLoader(ContextMutablePtr global_context_, Databases database , force_restore(force_restore_) , force_attach(force_attach_) { - all_tables.default_database = global_context->getCurrentDatabase(); + metadata.default_database = global_context->getCurrentDatabase(); log = &Poco::Logger::get("TablesLoader"); } @@ -54,15 +54,19 @@ void TablesLoader::loadTables() database.second->loadStoredObjects(global_context, force_restore, force_attach, true); } + if (databases_to_load.empty()) + return; + /// Read and parse metadata from Ordinary, Atomic, Materialized*, Replicated, etc databases. Build dependency graph. for (auto & database_name : databases_to_load) { databases[database_name]->beforeLoadingMetadata(global_context, force_restore, force_attach); - databases[database_name]->loadTablesMetadata(global_context, all_tables); + databases[database_name]->loadTablesMetadata(global_context, metadata); } LOG_INFO(log, "Parsed metadata of {} tables in {} databases in {} sec", - all_tables.metadata.size(), databases_to_load.size(), stopwatch.elapsedSeconds()); + metadata.parsed_tables.size(), databases_to_load.size(), stopwatch.elapsedSeconds()); + stopwatch.restart(); logDependencyGraph(); @@ -86,13 +90,13 @@ void TablesLoader::removeUnresolvableDependencies() auto need_exclude_dependency = [this](const QualifiedTableName & dependency_name, const DependenciesInfo & info) { /// Table exists and will be loaded - if (all_tables.metadata.contains(dependency_name)) + if (metadata.parsed_tables.contains(dependency_name)) return false; /// Table exists and it's already loaded if (DatabaseCatalog::instance().isTableExist(StorageID(dependency_name.database, dependency_name.table), global_context)) return true; /// It's XML dictionary. It was loaded before tables and DDL dictionaries. - if (dependency_name.database == all_tables.default_database && + if (dependency_name.database == metadata.default_database && global_context->getExternalDictionariesLoader().has(dependency_name.table)) return true; @@ -100,24 +104,24 @@ void TablesLoader::removeUnresolvableDependencies() /// We will ignore it and try to load dependent tables without "dependency_name" /// (but most likely dependent tables will fail to load). LOG_WARNING(log, "Tables {} depend on {}, but seems like the it does not exist. Will ignore it and try to load existing tables", - fmt::join(info.dependent_tables, ", "), dependency_name); + fmt::join(info.dependent_database_objects, ", "), dependency_name); if (info.dependencies_count) throw Exception(ErrorCodes::LOGICAL_ERROR, "Table {} does not exist, but we have seen its AST and found {} dependencies." "It's a bug", dependency_name, info.dependencies_count); - if (info.dependent_tables.empty()) + if (info.dependent_database_objects.empty()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Table {} does not have dependencies and dependent tables as it expected to." "It's a bug", dependency_name); return true; }; - auto table_it = all_tables.dependencies_info.begin(); - while (table_it != all_tables.dependencies_info.end()) + auto table_it = metadata.dependencies_info.begin(); + while (table_it != metadata.dependencies_info.end()) { auto & info = table_it->second; if (need_exclude_dependency(table_it->first, info)) - table_it = removeResolvedDependency(table_it, all_tables.independent_tables); + table_it = removeResolvedDependency(table_it, metadata.independent_database_objects); else ++table_it; } @@ -125,79 +129,81 @@ void TablesLoader::removeUnresolvableDependencies() void TablesLoader::loadTablesInTopologicalOrder(ThreadPool & pool) { - /// While we have some independent tables to load, load them in parallel. - /// Then remove independent tables from graph and find new ones. + /// Load independent tables in parallel. + /// Then remove loaded tables from dependency graph, find tables/dictionaries that do not have unresolved dependencies anymore, + /// move them to the list of independent tables and load. + /// Repeat until we have some tables to load. + /// If we do not, then either all objects are loaded or there is cyclic dependency. + /// Complexity: O(V + E) size_t level = 0; do { - assert(all_tables.metadata.size() == tables_processed + all_tables.independent_tables.size() + getNumberOfTablesWithDependencies()); + assert(metadata.parsed_tables.size() == tables_processed + metadata.independent_database_objects.size() + getNumberOfTablesWithDependencies()); logDependencyGraph(); startLoadingIndependentTables(pool, level); - TableNames new_independent_tables; - for (const auto & table_name : all_tables.independent_tables) + TableNames new_independent_database_objects; + for (const auto & table_name : metadata.independent_database_objects) { - auto info_it = all_tables.dependencies_info.find(table_name); - if (info_it == all_tables.dependencies_info.end()) + auto info_it = metadata.dependencies_info.find(table_name); + if (info_it == metadata.dependencies_info.end()) { /// No tables depend on table_name and it was not even added to dependencies_info continue; } - removeResolvedDependency(info_it, new_independent_tables); + removeResolvedDependency(info_it, new_independent_database_objects); } pool.wait(); - all_tables.independent_tables = std::move(new_independent_tables); + metadata.independent_database_objects = std::move(new_independent_database_objects); ++level; - } while (!all_tables.independent_tables.empty()); + } while (!metadata.independent_database_objects.empty()); checkCyclicDependencies(); } -DependenciesInfosIter TablesLoader::removeResolvedDependency(const DependenciesInfosIter & info_it, TableNames & independent_tables) +DependenciesInfosIter TablesLoader::removeResolvedDependency(const DependenciesInfosIter & info_it, TableNames & independent_database_objects) { auto & info = info_it->second; if (info.dependencies_count) throw Exception(ErrorCodes::LOGICAL_ERROR, "Table {} is in list of independent tables, but dependencies count is {}." "It's a bug", info_it->first, info.dependencies_count); - if (info.dependent_tables.empty()) + if (info.dependent_database_objects.empty()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Table {} does not have dependent tables. It's a bug", info_it->first); /// Decrement number of dependencies for each dependent table - for (auto & dependent_table : info.dependent_tables) + for (auto & dependent_table : info.dependent_database_objects) { - auto & dependent_info = all_tables.dependencies_info[dependent_table]; + auto & dependent_info = metadata.dependencies_info[dependent_table]; auto & dependencies_count = dependent_info.dependencies_count; if (dependencies_count == 0) throw Exception(ErrorCodes::LOGICAL_ERROR, "Trying to decrement 0 dependencies counter for {}. It's a bug", dependent_table); --dependencies_count; if (dependencies_count == 0) { - independent_tables.push_back(dependent_table); - if (dependent_info.dependent_tables.empty()) - all_tables.dependencies_info.erase(dependent_table); + independent_database_objects.push_back(dependent_table); + if (dependent_info.dependent_database_objects.empty()) + metadata.dependencies_info.erase(dependent_table); } } - return all_tables.dependencies_info.erase(info_it); + return metadata.dependencies_info.erase(info_it); } void TablesLoader::startLoadingIndependentTables(ThreadPool & pool, size_t level) { - size_t total_tables = all_tables.metadata.size(); + size_t total_tables = metadata.parsed_tables.size(); - LOG_INFO(log, "Loading {} tables with {} dependency level", all_tables.independent_tables.size(), level); + LOG_INFO(log, "Loading {} tables with {} dependency level", metadata.independent_database_objects.size(), level); - for (const auto & table_name : all_tables.independent_tables) + for (const auto & table_name : metadata.independent_database_objects) { pool.scheduleOrThrowOnError([this, total_tables, &table_name]() { - const auto & path_and_query = all_tables.metadata[table_name]; - const auto & path = path_and_query.first; - const auto & ast = path_and_query.second; - databases[table_name.database]->loadTableFromMetadata(global_context, path, table_name, ast, force_restore); + const auto & path_and_query = metadata.parsed_tables[table_name]; + databases[table_name.database]->loadTableFromMetadata(global_context, path_and_query.path, table_name, path_and_query.ast, force_restore); logAboutProgress(log, ++tables_processed, total_tables, stopwatch); }); } @@ -206,7 +212,7 @@ void TablesLoader::startLoadingIndependentTables(ThreadPool & pool, size_t level size_t TablesLoader::getNumberOfTablesWithDependencies() const { size_t number_of_tables_with_dependencies = 0; - for (const auto & info : all_tables.dependencies_info) + for (const auto & info : metadata.dependencies_info) if (info.second.dependencies_count) ++number_of_tables_with_dependencies; return number_of_tables_with_dependencies; @@ -215,32 +221,34 @@ size_t TablesLoader::getNumberOfTablesWithDependencies() const void TablesLoader::checkCyclicDependencies() const { /// Loading is finished if all dependencies are resolved - if (all_tables.dependencies_info.empty()) + if (metadata.dependencies_info.empty()) return; - for (const auto & info : all_tables.dependencies_info) + for (const auto & info : metadata.dependencies_info) { LOG_WARNING(log, "Cannot resolve dependencies: Table {} have {} dependencies and {} dependent tables. List of dependent tables: {}", info.first, info.second.dependencies_count, - info.second.dependent_tables.size(), fmt::join(info.second.dependent_tables, ", ")); + info.second.dependent_database_objects.size(), fmt::join(info.second.dependent_database_objects, ", ")); assert(info.second.dependencies_count == 0); } throw Exception(ErrorCodes::INFINITE_LOOP, "Cannot attach {} tables due to cyclic dependencies. " - "See server log for details.", all_tables.dependencies_info.size()); + "See server log for details.", metadata.dependencies_info.size()); } void TablesLoader::logDependencyGraph() const { - LOG_TRACE(log, "Have {} independent tables: {}", all_tables.independent_tables.size(), fmt::join(all_tables.independent_tables, ", ")); - for (const auto & dependencies : all_tables.dependencies_info) + LOG_TEST(log, "Have {} independent tables: {}", + metadata.independent_database_objects.size(), + fmt::join(metadata.independent_database_objects, ", ")); + for (const auto & dependencies : metadata.dependencies_info) { - LOG_TRACE(log, + LOG_TEST(log, "Table {} have {} dependencies and {} dependent tables. List of dependent tables: {}", dependencies.first, dependencies.second.dependencies_count, - dependencies.second.dependent_tables.size(), - fmt::join(dependencies.second.dependent_tables, ", ")); + dependencies.second.dependent_database_objects.size(), + fmt::join(dependencies.second.dependent_database_objects, ", ")); } } diff --git a/src/Databases/TablesLoader.h b/src/Databases/TablesLoader.h index 35dae8a5ad6..12f6c2e86a5 100644 --- a/src/Databases/TablesLoader.h +++ b/src/Databases/TablesLoader.h @@ -26,15 +26,21 @@ void logAboutProgress(Poco::Logger * log, size_t processed, size_t total, Atomic class IDatabase; using DatabasePtr = std::shared_ptr; -using ParsedMetadata = std::map>; +struct ParsedTableMetadata +{ + String path; + ASTPtr ast; +}; + +using ParsedMetadata = std::map; using TableNames = std::vector; struct DependenciesInfo { /// How many dependencies this table have size_t dependencies_count = 0; - /// List of tables which depend on this table - TableNames dependent_tables; + /// List of tables/dictionaries which depend on this table/dictionary + TableNames dependent_database_objects; }; using DependenciesInfos = std::unordered_map; @@ -45,18 +51,18 @@ struct ParsedTablesMetadata String default_database; std::mutex mutex; - ParsedMetadata metadata; + ParsedMetadata parsed_tables; /// For logging size_t total_dictionaries = 0; - /// List of tables that do not have any dependencies and can be loaded - TableNames independent_tables; + /// List of tables/dictionaries that do not have any dependencies and can be loaded + TableNames independent_database_objects; /// Actually it contains two different maps (with, probably, intersecting keys): - /// 1. table name -> number of dependencies - /// 2. table name -> dependent tables list (adjacency list of dependencies graph). - /// If table A depends on table B, then there is an edge B --> A, i.e. dependencies_info[B].dependent_tables contains A. + /// 1. table/dictionary name -> number of dependencies + /// 2. table/dictionary name -> dependent tables/dictionaries list (adjacency list of dependencies graph). + /// If table A depends on table B, then there is an edge B --> A, i.e. dependencies_info[B].dependent_database_objects contains A. /// And dependencies_info[C].dependencies_count is a number of incoming edges for vertex C (how many tables we have to load before C). DependenciesInfos dependencies_info; }; @@ -81,7 +87,7 @@ private: bool force_attach; Strings databases_to_load; - ParsedTablesMetadata all_tables; + ParsedTablesMetadata metadata; Poco::Logger * log; std::atomic tables_processed{0}; AtomicStopwatch stopwatch; @@ -92,7 +98,7 @@ private: void loadTablesInTopologicalOrder(ThreadPool & pool); - DependenciesInfosIter removeResolvedDependency(const DependenciesInfosIter & info_it, TableNames & independent_tables); + DependenciesInfosIter removeResolvedDependency(const DependenciesInfosIter & info_it, TableNames & independent_database_objects); void startLoadingIndependentTables(ThreadPool & pool, size_t level); diff --git a/src/Dictionaries/PostgreSQLDictionarySource.cpp b/src/Dictionaries/PostgreSQLDictionarySource.cpp index 3fe9e899cd9..50be5592918 100644 --- a/src/Dictionaries/PostgreSQLDictionarySource.cpp +++ b/src/Dictionaries/PostgreSQLDictionarySource.cpp @@ -1,6 +1,7 @@ #include "PostgreSQLDictionarySource.h" #include +#include #include "DictionarySourceFactory.h" #include "registerDictionaries.h" @@ -29,19 +30,13 @@ namespace { ExternalQueryBuilder makeExternalQueryBuilder(const DictionaryStructure & dict_struct, const String & schema, const String & table, const String & query, const String & where) { - auto schema_value = schema; - auto table_value = table; + QualifiedTableName qualified_name{schema, table}; + + if (qualified_name.database.empty()) + qualified_name = QualifiedTableName::parseFromString(qualified_name.table); - if (schema_value.empty()) - { - if (auto pos = table_value.find('.'); pos != std::string::npos) - { - schema_value = table_value.substr(0, pos); - table_value = table_value.substr(pos + 1); - } - } /// Do not need db because it is already in a connection string. - return {dict_struct, "", schema_value, table_value, query, where, IdentifierQuotingStyle::DoubleQuotes}; + return {dict_struct, "", qualified_name.database, qualified_name.table, query, where, IdentifierQuotingStyle::DoubleQuotes}; } } diff --git a/src/Dictionaries/XDBCDictionarySource.cpp b/src/Dictionaries/XDBCDictionarySource.cpp index 9fc7e92634b..bf7526580c0 100644 --- a/src/Dictionaries/XDBCDictionarySource.cpp +++ b/src/Dictionaries/XDBCDictionarySource.cpp @@ -38,29 +38,22 @@ namespace const std::string & where_, IXDBCBridgeHelper & bridge_) { - std::string schema = schema_; - std::string table = table_; + QualifiedTableName qualified_name{schema_, table_}; if (bridge_.isSchemaAllowed()) { - if (schema.empty()) - { - if (auto pos = table.find('.'); pos != std::string::npos) - { - schema = table.substr(0, pos); - table = table.substr(pos + 1); - } - } + if (qualified_name.database.empty()) + qualified_name = QualifiedTableName::parseFromString(qualified_name.table); } else { - if (!schema.empty()) + if (!qualified_name.database.empty()) throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Dictionary source of type {} specifies a schema but schema is not supported by {}-driver", bridge_.getName()); } - return {dict_struct_, db_, schema, table, query_, where_, bridge_.getIdentifierQuotingStyle()}; + return {dict_struct_, db_, qualified_name.database, qualified_name.table, query_, where_, bridge_.getIdentifierQuotingStyle()}; } } diff --git a/src/Dictionaries/getDictionaryConfigurationFromAST.cpp b/src/Dictionaries/getDictionaryConfigurationFromAST.cpp index c77ac36ade6..0ed5b3af83d 100644 --- a/src/Dictionaries/getDictionaryConfigurationFromAST.cpp +++ b/src/Dictionaries/getDictionaryConfigurationFromAST.cpp @@ -4,7 +4,6 @@ #include #include #include -#include #include #include #include @@ -16,6 +15,8 @@ #include #include #include +#include +#include namespace DB @@ -576,4 +577,28 @@ getDictionaryConfigurationFromAST(const ASTCreateQuery & query, ContextPtr conte return conf; } +std::optional +getInfoIfClickHouseDictionarySource(DictionaryConfigurationPtr & config, ContextPtr global_context) +{ + ClickHouseDictionarySourceInfo info; + + String host = config->getString("dictionary.source.clickhouse.host", ""); + UInt16 port = config->getUInt("dictionary.source.clickhouse.port", 0); + String database = config->getString("dictionary.source.clickhouse.db", ""); + String table = config->getString("dictionary.source.clickhouse.table", ""); + bool secure = config->getBool("dictionary.source.clickhouse.secure", false); + + if (host.empty() || port == 0 || table.empty()) + return {}; + + info.table_name = {database, table}; + + UInt16 default_port = secure ? global_context->getTCPPortSecure().value_or(0) : global_context->getTCPPort(); + if (!isLocalAddress({host, port}, default_port)) + return info; + + info.is_local = true; + return info; +} + } diff --git a/src/Dictionaries/getDictionaryConfigurationFromAST.h b/src/Dictionaries/getDictionaryConfigurationFromAST.h index b464fdf1d8c..ec44b9815ff 100644 --- a/src/Dictionaries/getDictionaryConfigurationFromAST.h +++ b/src/Dictionaries/getDictionaryConfigurationFromAST.h @@ -15,4 +15,13 @@ using DictionaryConfigurationPtr = Poco::AutoPtr +getInfoIfClickHouseDictionarySource(DictionaryConfigurationPtr & config, ContextPtr global_context); + } diff --git a/src/Functions/FunctionJoinGet.cpp b/src/Functions/FunctionJoinGet.cpp index ee173607437..f0dff0ac7e4 100644 --- a/src/Functions/FunctionJoinGet.cpp +++ b/src/Functions/FunctionJoinGet.cpp @@ -48,22 +48,11 @@ getJoin(const ColumnsWithTypeAndName & arguments, ContextPtr context) "Illegal type " + arguments[0].type->getName() + " of first argument of function joinGet, expected a const string.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - size_t dot = join_name.find('.'); - String database_name; - if (dot == String::npos) - { - database_name = context->getCurrentDatabase(); - dot = 0; - } - else - { - database_name = join_name.substr(0, dot); - ++dot; - } - String table_name = join_name.substr(dot); - if (table_name.empty()) - throw Exception("joinGet does not allow empty table name", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - auto table = DatabaseCatalog::instance().getTable({database_name, table_name}, std::const_pointer_cast(context)); + auto qualified_name = QualifiedTableName::parseFromString(join_name); + if (qualified_name.database.empty()) + qualified_name.database = context->getCurrentDatabase(); + + auto table = DatabaseCatalog::instance().getTable({qualified_name.database, qualified_name.table}, std::const_pointer_cast(context)); auto storage_join = std::dynamic_pointer_cast(table); if (!storage_join) throw Exception("Table " + join_name + " should have engine StorageJoin", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); diff --git a/src/Interpreters/ExternalDictionariesLoader.cpp b/src/Interpreters/ExternalDictionariesLoader.cpp index cbb0e52b91b..fdd371c5038 100644 --- a/src/Interpreters/ExternalDictionariesLoader.cpp +++ b/src/Interpreters/ExternalDictionariesLoader.cpp @@ -89,47 +89,40 @@ DictionaryStructure ExternalDictionariesLoader::getDictionaryStructure(const std std::string ExternalDictionariesLoader::resolveDictionaryName(const std::string & dictionary_name, const std::string & current_database_name) const { - bool has_dictionary = has(dictionary_name); - if (has_dictionary) + if (has(dictionary_name)) return dictionary_name; - std::string resolved_name = resolveDictionaryNameFromDatabaseCatalog(dictionary_name); - has_dictionary = has(resolved_name); + std::string resolved_name = resolveDictionaryNameFromDatabaseCatalog(dictionary_name, current_database_name); - if (!has_dictionary) - { - /// If dictionary not found. And database was not implicitly specified - /// we can qualify dictionary name with current database name. - /// It will help if dictionary is created with DDL and is in current database. - if (dictionary_name.find('.') == std::string::npos) - { - String dictionary_name_with_database = current_database_name + '.' + dictionary_name; - resolved_name = resolveDictionaryNameFromDatabaseCatalog(dictionary_name_with_database); - has_dictionary = has(resolved_name); - } - } + if (has(resolved_name)) + return resolved_name; - if (!has_dictionary) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Dictionary ({}) not found", backQuote(dictionary_name)); - - return resolved_name; + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Dictionary ({}) not found", backQuote(dictionary_name)); } -std::string ExternalDictionariesLoader::resolveDictionaryNameFromDatabaseCatalog(const std::string & name) const +std::string ExternalDictionariesLoader::resolveDictionaryNameFromDatabaseCatalog(const std::string & name, const std::string & current_database_name) const { /// If it's dictionary from Atomic database, then we need to convert qualified name to UUID. /// Try to split name and get id from associated StorageDictionary. /// If something went wrong, return name as is. - auto pos = name.find('.'); - if (pos == std::string::npos || name.find('.', pos + 1) != std::string::npos) + auto qualified_name = QualifiedTableName::tryParseFromString(name); + if (!qualified_name) return name; - std::string maybe_database_name = name.substr(0, pos); - std::string maybe_table_name = name.substr(pos + 1); + if (qualified_name->database.empty()) + { + /// Ether database name is not specified and we should use current one + /// or it's an XML dictionary. + bool is_xml_dictionary = has(name); + if (is_xml_dictionary) + return name; + else + qualified_name->database = current_database_name; + } auto [db, table] = DatabaseCatalog::instance().tryGetDatabaseAndTable( - {maybe_database_name, maybe_table_name}, + {qualified_name->database, qualified_name->table}, const_pointer_cast(getContext())); if (!db) diff --git a/src/Interpreters/ExternalDictionariesLoader.h b/src/Interpreters/ExternalDictionariesLoader.h index 06f64ef30c5..f748d75d908 100644 --- a/src/Interpreters/ExternalDictionariesLoader.h +++ b/src/Interpreters/ExternalDictionariesLoader.h @@ -42,7 +42,7 @@ protected: std::string resolveDictionaryName(const std::string & dictionary_name, const std::string & current_database_name) const; /// Try convert qualified dictionary name to persistent UUID - std::string resolveDictionaryNameFromDatabaseCatalog(const std::string & name) const; + std::string resolveDictionaryNameFromDatabaseCatalog(const std::string & name, const std::string & current_database_name) const; friend class StorageSystemDictionaries; friend class DatabaseDictionary; diff --git a/src/Interpreters/loadMetadata.h b/src/Interpreters/loadMetadata.h index e44c84ff2ba..e918b5f530c 100644 --- a/src/Interpreters/loadMetadata.h +++ b/src/Interpreters/loadMetadata.h @@ -15,6 +15,8 @@ void loadMetadataSystem(ContextMutablePtr context); /// Use separate function to load system tables. void loadMetadata(ContextMutablePtr context, const String & default_database_name = {}); +/// Background operations in system tables may slowdown loading of the rest tables, +/// so we startup system tables after all databases are loaded. void startupSystemTables(); } diff --git a/src/TableFunctions/TableFunctionRemote.cpp b/src/TableFunctions/TableFunctionRemote.cpp index 08f61a49fa5..62a0978d42f 100644 --- a/src/TableFunctions/TableFunctionRemote.cpp +++ b/src/TableFunctions/TableFunctionRemote.cpp @@ -93,14 +93,8 @@ void TableFunctionRemote::parseArguments(const ASTPtr & ast_function, ContextPtr ++arg_num; - size_t dot = remote_database.find('.'); - if (dot != String::npos) - { - /// NOTE Bad - do not support identifiers in backquotes. - remote_table = remote_database.substr(dot + 1); - remote_database = remote_database.substr(0, dot); - } - else + auto qualified_name = QualifiedTableName::parseFromString(remote_database); + if (qualified_name.database.empty()) { if (arg_num >= args.size()) { @@ -108,6 +102,7 @@ void TableFunctionRemote::parseArguments(const ASTPtr & ast_function, ContextPtr } else { + std::swap(qualified_name.database, qualified_name.table); args[arg_num] = evaluateConstantExpressionOrIdentifierAsLiteral(args[arg_num], context); remote_table = args[arg_num]->as().value.safeGet(); ++arg_num; From 2ec9b6fe3be4bacce182cfa73df015e1dda14d9a Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Mon, 13 Sep 2021 22:36:55 +0300 Subject: [PATCH 27/80] fix --- src/Core/QualifiedTableName.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Core/QualifiedTableName.h b/src/Core/QualifiedTableName.h index dd043b86ee1..c1cb9b27d15 100644 --- a/src/Core/QualifiedTableName.h +++ b/src/Core/QualifiedTableName.h @@ -58,7 +58,7 @@ struct QualifiedTableName { name.table = std::move(maybe_qualified_name); } - else if (maybe_qualified_name.find('.', pos + 1)) + else if (maybe_qualified_name.find('.', pos + 1) != std::string::npos) { /// Do not allow multiple dots return {}; From f5c38fe027cb12a0147a30c0b2e46247d6401272 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Tue, 14 Sep 2021 00:39:50 +0300 Subject: [PATCH 28/80] fix --- src/Backups/renameInCreateQuery.cpp | 2 +- src/TableFunctions/TableFunctionRemote.cpp | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/Backups/renameInCreateQuery.cpp b/src/Backups/renameInCreateQuery.cpp index 5d99ea585b5..4c78844d266 100644 --- a/src/Backups/renameInCreateQuery.cpp +++ b/src/Backups/renameInCreateQuery.cpp @@ -171,7 +171,7 @@ namespace else qualified_name = QualifiedTableName::parseFromString(name); - if(qualified_name.database.empty()) + if (qualified_name.database.empty()) { std::swap(qualified_name.database, qualified_name.table); table_name_index = 2; diff --git a/src/TableFunctions/TableFunctionRemote.cpp b/src/TableFunctions/TableFunctionRemote.cpp index 62a0978d42f..3c39e3f2ec0 100644 --- a/src/TableFunctions/TableFunctionRemote.cpp +++ b/src/TableFunctions/TableFunctionRemote.cpp @@ -104,10 +104,13 @@ void TableFunctionRemote::parseArguments(const ASTPtr & ast_function, ContextPtr { std::swap(qualified_name.database, qualified_name.table); args[arg_num] = evaluateConstantExpressionOrIdentifierAsLiteral(args[arg_num], context); - remote_table = args[arg_num]->as().value.safeGet(); + qualified_name.table = args[arg_num]->as().value.safeGet(); ++arg_num; } } + + remote_database = std::move(qualified_name.database); + remote_table = std::move(qualified_name.table); } /// Cluster function may have sharding key for insert From f03484e0dcda7edb65b073f7d549021cb01fef46 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Tue, 14 Sep 2021 11:46:40 +0300 Subject: [PATCH 29/80] fix test --- .../0_stateless/01372_remote_table_function_empty_table.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01372_remote_table_function_empty_table.sql b/tests/queries/0_stateless/01372_remote_table_function_empty_table.sql index 4153dc632f3..55c9d3f63d3 100644 --- a/tests/queries/0_stateless/01372_remote_table_function_empty_table.sql +++ b/tests/queries/0_stateless/01372_remote_table_function_empty_table.sql @@ -1,4 +1,4 @@ -SELECT * FROM remote('127..2', 'a.'); -- { serverError 36 } +SELECT * FROM remote('127..2', 'a.'); -- { serverError 62 } -- Clear cache to avoid future errors in the logs SYSTEM DROP DNS CACHE From bbd7799375ba4749a80bbd6ebe11d16033f2af54 Mon Sep 17 00:00:00 2001 From: hexiaoting Date: Tue, 14 Sep 2021 18:51:13 +0800 Subject: [PATCH 30/80] fuse multi setting options into one when Optimize --- src/Interpreters/TreeOptimizer.cpp | 129 +++++++++++++++++- src/Interpreters/TreeRewriter.cpp | 118 ---------------- .../01744_fuse_sum_count_aggregate.sql | 2 +- .../01956_fuse_quantile_optimization.sql | 2 +- 4 files changed, 128 insertions(+), 123 deletions(-) diff --git a/src/Interpreters/TreeOptimizer.cpp b/src/Interpreters/TreeOptimizer.cpp index fb220cddc02..4d7e752aa7a 100644 --- a/src/Interpreters/TreeOptimizer.cpp +++ b/src/Interpreters/TreeOptimizer.cpp @@ -588,10 +588,118 @@ void optimizeFunctionsToSubcolumns(ASTPtr & query, const StorageMetadataPtr & me RewriteFunctionToSubcolumnVisitor(data).visit(query); } +struct FuseSumCountAggregates +{ + std::vector sums {}; + std::vector counts {}; + std::vector avgs {}; + + void addFuncNode(ASTFunction * func) + { + if (func->name == "sum") + sums.push_back(func); + else if (func->name == "count") + counts.push_back(func); + else + { + assert(func->name == "avg"); + avgs.push_back(func); + } + } + + bool canBeFused() const + { + // Need at least two different kinds of functions to fuse. + if (sums.empty() && counts.empty()) + return false; + if (sums.empty() && avgs.empty()) + return false; + if (counts.empty() && avgs.empty()) + return false; + return true; + } +}; + +struct FuseSumCountAggregatesVisitorData +{ + using TypeToVisit = ASTFunction; + + std::unordered_map fuse_map; + + void visit(ASTFunction & func, ASTPtr &) + { + if (func.name == "sum" || func.name == "avg" || func.name == "count") + { + if (func.arguments->children.empty()) + return; + + // Probably we can extend it to match count() for non-nullable argument + // to sum/avg with any other argument. Now we require strict match. + const auto argument = func.arguments->children.at(0)->getColumnName(); + auto it = fuse_map.find(argument); + if (it != fuse_map.end()) + { + it->second.addFuncNode(&func); + } + else + { + FuseSumCountAggregates funcs{}; + funcs.addFuncNode(&func); + fuse_map[argument] = funcs; + } + } + } +}; + +using FuseSumCountAggregatesVisitor = InDepthNodeVisitor, true>; + +// Replaces one avg/sum/count function with an appropriate expression with +// sumCount(). +void replaceWithSumCount(String column_name, ASTFunction & func) +{ + auto func_base = makeASTFunction("sumCount", std::make_shared(column_name)); + auto exp_list = std::make_shared(); + if (func.name == "sum" || func.name == "count") + { + /// Rewrite "sum" to sumCount().1, rewrite "count" to sumCount().2 + UInt8 idx = (func.name == "sum" ? 1 : 2); + func.name = "tupleElement"; + exp_list->children.push_back(func_base); + exp_list->children.push_back(std::make_shared(idx)); + } + else + { + /// Rewrite "avg" to sumCount().1 / sumCount().2 + auto new_arg1 = makeASTFunction("tupleElement", func_base, std::make_shared(UInt8(1))); + auto new_arg2 = makeASTFunction("tupleElement", func_base, std::make_shared(UInt8(2))); + func.name = "divide"; + exp_list->children.push_back(new_arg1); + exp_list->children.push_back(new_arg2); + } + func.arguments = exp_list; + func.children.push_back(func.arguments); +} + +void fuseSumCountAggregates(std::unordered_map & fuse_map) +{ + for (auto & it : fuse_map) + { + if (it.second.canBeFused()) + { + for (auto & func: it.second.sums) + replaceWithSumCount(it.first, *func); + for (auto & func: it.second.avgs) + replaceWithSumCount(it.first, *func); + for (auto & func: it.second.counts) + replaceWithSumCount(it.first, *func); + } + } +} + /// Rewrites multi quantile()() functions with the same arguments to quantiles()()[] /// eg:SELECT quantile(0.5)(x), quantile(0.9)(x), quantile(0.95)(x) FROM... /// rewrite to : SELECT quantiles(0.5, 0.9, 0.95)(x)[1], quantiles(0.5, 0.9, 0.95)(x)[2], quantiles(0.5, 0.9, 0.95)(x)[3] FROM ... -void fuseCandidate(std::unordered_map & fuse_quantile) +void fuseQuantileCandidate(std::unordered_map & fuse_quantile) { for (const auto & candidate : fuse_quantile) { @@ -627,13 +735,21 @@ void fuseCandidate(std::unordered_map sums {}; - std::vector counts {}; - std::vector avgs {}; - - void addFuncNode(ASTFunction * func) - { - if (func->name == "sum") - sums.push_back(func); - else if (func->name == "count") - counts.push_back(func); - else - { - assert(func->name == "avg"); - avgs.push_back(func); - } - } - - bool canBeFused() const - { - // Need at least two different kinds of functions to fuse. - if (sums.empty() && counts.empty()) - return false; - if (sums.empty() && avgs.empty()) - return false; - if (counts.empty() && avgs.empty()) - return false; - return true; - } -}; - -struct FuseSumCountAggregatesVisitorData -{ - using TypeToVisit = ASTFunction; - - std::unordered_map fuse_map; - - void visit(ASTFunction & func, ASTPtr &) - { - if (func.name == "sum" || func.name == "avg" || func.name == "count") - { - if (func.arguments->children.empty()) - return; - - // Probably we can extend it to match count() for non-nullable argument - // to sum/avg with any other argument. Now we require strict match. - const auto argument = func.arguments->children.at(0)->getColumnName(); - auto it = fuse_map.find(argument); - if (it != fuse_map.end()) - { - it->second.addFuncNode(&func); - } - else - { - FuseSumCountAggregates funcs{}; - funcs.addFuncNode(&func); - fuse_map[argument] = funcs; - } - } - } -}; - using CustomizeAggregateFunctionsOrNullVisitor = InDepthNodeVisitor, true>; using CustomizeAggregateFunctionsMoveOrNullVisitor = InDepthNodeVisitor, true>; -using FuseSumCountAggregatesVisitor = InDepthNodeVisitor, true>; /// Translate qualified names such as db.table.column, table.column, table_alias.column to names' normal form. /// Expand asterisks and qualified asterisks with column names. @@ -263,49 +199,6 @@ void translateQualifiedNames(ASTPtr & query, const ASTSelectQuery & select_query throw Exception("Empty list of columns in SELECT query", ErrorCodes::EMPTY_LIST_OF_COLUMNS_QUERIED); } -// Replaces one avg/sum/count function with an appropriate expression with -// sumCount(). -void replaceWithSumCount(String column_name, ASTFunction & func) -{ - auto func_base = makeASTFunction("sumCount", std::make_shared(column_name)); - auto exp_list = std::make_shared(); - if (func.name == "sum" || func.name == "count") - { - /// Rewrite "sum" to sumCount().1, rewrite "count" to sumCount().2 - UInt8 idx = (func.name == "sum" ? 1 : 2); - func.name = "tupleElement"; - exp_list->children.push_back(func_base); - exp_list->children.push_back(std::make_shared(idx)); - } - else - { - /// Rewrite "avg" to sumCount().1 / sumCount().2 - auto new_arg1 = makeASTFunction("tupleElement", func_base, std::make_shared(UInt8(1))); - auto new_arg2 = makeASTFunction("tupleElement", func_base, std::make_shared(UInt8(2))); - func.name = "divide"; - exp_list->children.push_back(new_arg1); - exp_list->children.push_back(new_arg2); - } - func.arguments = exp_list; - func.children.push_back(func.arguments); -} - -void fuseSumCountAggregates(std::unordered_map & fuse_map) -{ - for (auto & it : fuse_map) - { - if (it.second.canBeFused()) - { - for (auto & func: it.second.sums) - replaceWithSumCount(it.first, *func); - for (auto & func: it.second.avgs) - replaceWithSumCount(it.first, *func); - for (auto & func: it.second.counts) - replaceWithSumCount(it.first, *func); - } - } -} - bool hasArrayJoin(const ASTPtr & ast) { if (const ASTFunction * function = ast->as()) @@ -1033,17 +926,6 @@ void TreeRewriter::normalize( CustomizeGlobalNotInVisitor(data_global_not_null_in).visit(query); } - // Try to fuse sum/avg/count with identical arguments to one sumCount call, - // if we have at least two different functions. E.g. we will replace sum(x) - // and count(x) with sumCount(x).1 and sumCount(x).2, and sumCount() will - // be calculated only once because of CSE. - if (settings.optimize_fuse_sum_count_avg) - { - FuseSumCountAggregatesVisitor::Data data; - FuseSumCountAggregatesVisitor(data).visit(query); - fuseSumCountAggregates(data.fuse_map); - } - /// Rewrite all aggregate functions to add -OrNull suffix to them if (settings.aggregate_functions_null_for_empty) { diff --git a/tests/queries/0_stateless/01744_fuse_sum_count_aggregate.sql b/tests/queries/0_stateless/01744_fuse_sum_count_aggregate.sql index cad7b5803d4..b2553e273fb 100644 --- a/tests/queries/0_stateless/01744_fuse_sum_count_aggregate.sql +++ b/tests/queries/0_stateless/01744_fuse_sum_count_aggregate.sql @@ -2,7 +2,7 @@ DROP TABLE IF EXISTS fuse_tbl; CREATE TABLE fuse_tbl(a Int8, b Int8) Engine = Log; INSERT INTO fuse_tbl SELECT number, number + 1 FROM numbers(1, 20); -SET optimize_fuse_sum_count_avg = 1; +SET optimize_fuse_functions= 1; SELECT sum(a), sum(b), count(b) from fuse_tbl; EXPLAIN SYNTAX SELECT sum(a), sum(b), count(b) from fuse_tbl; SELECT '---------NOT trigger fuse--------'; diff --git a/tests/queries/0_stateless/01956_fuse_quantile_optimization.sql b/tests/queries/0_stateless/01956_fuse_quantile_optimization.sql index 1f476740aef..0b9243ded19 100644 --- a/tests/queries/0_stateless/01956_fuse_quantile_optimization.sql +++ b/tests/queries/0_stateless/01956_fuse_quantile_optimization.sql @@ -14,7 +14,7 @@ SELECT quantileBFloat16(0.2)(d), quantileBFloat16(0.3)(d), quantileBFloat16(0.4) SELECT '---------After fuse result-----------'; -set optimize_fuse_quantile=true; +set optimize_fuse_functions=true; SELECT 'quantile:'; EXPLAIN SYNTAX SELECT quantile(0.2)(d), quantile(0.3)(d) FROM datetime; SELECT quantile(0.2)(d), quantile(0.3)(d) FROM datetime; From a3db56f05611e3248187c885004bd173f17d556c Mon Sep 17 00:00:00 2001 From: vdimir Date: Tue, 14 Sep 2021 15:12:21 +0300 Subject: [PATCH 31/80] Revert "fuse multi setting options into one when Optimize" This reverts commit bbd7799375ba4749a80bbd6ebe11d16033f2af54. --- src/Interpreters/TreeOptimizer.cpp | 129 +----------------- src/Interpreters/TreeRewriter.cpp | 118 ++++++++++++++++ .../01744_fuse_sum_count_aggregate.sql | 2 +- .../01956_fuse_quantile_optimization.sql | 2 +- 4 files changed, 123 insertions(+), 128 deletions(-) diff --git a/src/Interpreters/TreeOptimizer.cpp b/src/Interpreters/TreeOptimizer.cpp index 4d7e752aa7a..fb220cddc02 100644 --- a/src/Interpreters/TreeOptimizer.cpp +++ b/src/Interpreters/TreeOptimizer.cpp @@ -588,118 +588,10 @@ void optimizeFunctionsToSubcolumns(ASTPtr & query, const StorageMetadataPtr & me RewriteFunctionToSubcolumnVisitor(data).visit(query); } -struct FuseSumCountAggregates -{ - std::vector sums {}; - std::vector counts {}; - std::vector avgs {}; - - void addFuncNode(ASTFunction * func) - { - if (func->name == "sum") - sums.push_back(func); - else if (func->name == "count") - counts.push_back(func); - else - { - assert(func->name == "avg"); - avgs.push_back(func); - } - } - - bool canBeFused() const - { - // Need at least two different kinds of functions to fuse. - if (sums.empty() && counts.empty()) - return false; - if (sums.empty() && avgs.empty()) - return false; - if (counts.empty() && avgs.empty()) - return false; - return true; - } -}; - -struct FuseSumCountAggregatesVisitorData -{ - using TypeToVisit = ASTFunction; - - std::unordered_map fuse_map; - - void visit(ASTFunction & func, ASTPtr &) - { - if (func.name == "sum" || func.name == "avg" || func.name == "count") - { - if (func.arguments->children.empty()) - return; - - // Probably we can extend it to match count() for non-nullable argument - // to sum/avg with any other argument. Now we require strict match. - const auto argument = func.arguments->children.at(0)->getColumnName(); - auto it = fuse_map.find(argument); - if (it != fuse_map.end()) - { - it->second.addFuncNode(&func); - } - else - { - FuseSumCountAggregates funcs{}; - funcs.addFuncNode(&func); - fuse_map[argument] = funcs; - } - } - } -}; - -using FuseSumCountAggregatesVisitor = InDepthNodeVisitor, true>; - -// Replaces one avg/sum/count function with an appropriate expression with -// sumCount(). -void replaceWithSumCount(String column_name, ASTFunction & func) -{ - auto func_base = makeASTFunction("sumCount", std::make_shared(column_name)); - auto exp_list = std::make_shared(); - if (func.name == "sum" || func.name == "count") - { - /// Rewrite "sum" to sumCount().1, rewrite "count" to sumCount().2 - UInt8 idx = (func.name == "sum" ? 1 : 2); - func.name = "tupleElement"; - exp_list->children.push_back(func_base); - exp_list->children.push_back(std::make_shared(idx)); - } - else - { - /// Rewrite "avg" to sumCount().1 / sumCount().2 - auto new_arg1 = makeASTFunction("tupleElement", func_base, std::make_shared(UInt8(1))); - auto new_arg2 = makeASTFunction("tupleElement", func_base, std::make_shared(UInt8(2))); - func.name = "divide"; - exp_list->children.push_back(new_arg1); - exp_list->children.push_back(new_arg2); - } - func.arguments = exp_list; - func.children.push_back(func.arguments); -} - -void fuseSumCountAggregates(std::unordered_map & fuse_map) -{ - for (auto & it : fuse_map) - { - if (it.second.canBeFused()) - { - for (auto & func: it.second.sums) - replaceWithSumCount(it.first, *func); - for (auto & func: it.second.avgs) - replaceWithSumCount(it.first, *func); - for (auto & func: it.second.counts) - replaceWithSumCount(it.first, *func); - } - } -} - /// Rewrites multi quantile()() functions with the same arguments to quantiles()()[] /// eg:SELECT quantile(0.5)(x), quantile(0.9)(x), quantile(0.95)(x) FROM... /// rewrite to : SELECT quantiles(0.5, 0.9, 0.95)(x)[1], quantiles(0.5, 0.9, 0.95)(x)[2], quantiles(0.5, 0.9, 0.95)(x)[3] FROM ... -void fuseQuantileCandidate(std::unordered_map & fuse_quantile) +void fuseCandidate(std::unordered_map & fuse_quantile) { for (const auto & candidate : fuse_quantile) { @@ -735,21 +627,13 @@ void fuseQuantileCandidate(std::unordered_map sums {}; + std::vector counts {}; + std::vector avgs {}; + + void addFuncNode(ASTFunction * func) + { + if (func->name == "sum") + sums.push_back(func); + else if (func->name == "count") + counts.push_back(func); + else + { + assert(func->name == "avg"); + avgs.push_back(func); + } + } + + bool canBeFused() const + { + // Need at least two different kinds of functions to fuse. + if (sums.empty() && counts.empty()) + return false; + if (sums.empty() && avgs.empty()) + return false; + if (counts.empty() && avgs.empty()) + return false; + return true; + } +}; + +struct FuseSumCountAggregatesVisitorData +{ + using TypeToVisit = ASTFunction; + + std::unordered_map fuse_map; + + void visit(ASTFunction & func, ASTPtr &) + { + if (func.name == "sum" || func.name == "avg" || func.name == "count") + { + if (func.arguments->children.empty()) + return; + + // Probably we can extend it to match count() for non-nullable argument + // to sum/avg with any other argument. Now we require strict match. + const auto argument = func.arguments->children.at(0)->getColumnName(); + auto it = fuse_map.find(argument); + if (it != fuse_map.end()) + { + it->second.addFuncNode(&func); + } + else + { + FuseSumCountAggregates funcs{}; + funcs.addFuncNode(&func); + fuse_map[argument] = funcs; + } + } + } +}; + using CustomizeAggregateFunctionsOrNullVisitor = InDepthNodeVisitor, true>; using CustomizeAggregateFunctionsMoveOrNullVisitor = InDepthNodeVisitor, true>; +using FuseSumCountAggregatesVisitor = InDepthNodeVisitor, true>; /// Translate qualified names such as db.table.column, table.column, table_alias.column to names' normal form. /// Expand asterisks and qualified asterisks with column names. @@ -199,6 +263,49 @@ void translateQualifiedNames(ASTPtr & query, const ASTSelectQuery & select_query throw Exception("Empty list of columns in SELECT query", ErrorCodes::EMPTY_LIST_OF_COLUMNS_QUERIED); } +// Replaces one avg/sum/count function with an appropriate expression with +// sumCount(). +void replaceWithSumCount(String column_name, ASTFunction & func) +{ + auto func_base = makeASTFunction("sumCount", std::make_shared(column_name)); + auto exp_list = std::make_shared(); + if (func.name == "sum" || func.name == "count") + { + /// Rewrite "sum" to sumCount().1, rewrite "count" to sumCount().2 + UInt8 idx = (func.name == "sum" ? 1 : 2); + func.name = "tupleElement"; + exp_list->children.push_back(func_base); + exp_list->children.push_back(std::make_shared(idx)); + } + else + { + /// Rewrite "avg" to sumCount().1 / sumCount().2 + auto new_arg1 = makeASTFunction("tupleElement", func_base, std::make_shared(UInt8(1))); + auto new_arg2 = makeASTFunction("tupleElement", func_base, std::make_shared(UInt8(2))); + func.name = "divide"; + exp_list->children.push_back(new_arg1); + exp_list->children.push_back(new_arg2); + } + func.arguments = exp_list; + func.children.push_back(func.arguments); +} + +void fuseSumCountAggregates(std::unordered_map & fuse_map) +{ + for (auto & it : fuse_map) + { + if (it.second.canBeFused()) + { + for (auto & func: it.second.sums) + replaceWithSumCount(it.first, *func); + for (auto & func: it.second.avgs) + replaceWithSumCount(it.first, *func); + for (auto & func: it.second.counts) + replaceWithSumCount(it.first, *func); + } + } +} + bool hasArrayJoin(const ASTPtr & ast) { if (const ASTFunction * function = ast->as()) @@ -926,6 +1033,17 @@ void TreeRewriter::normalize( CustomizeGlobalNotInVisitor(data_global_not_null_in).visit(query); } + // Try to fuse sum/avg/count with identical arguments to one sumCount call, + // if we have at least two different functions. E.g. we will replace sum(x) + // and count(x) with sumCount(x).1 and sumCount(x).2, and sumCount() will + // be calculated only once because of CSE. + if (settings.optimize_fuse_sum_count_avg) + { + FuseSumCountAggregatesVisitor::Data data; + FuseSumCountAggregatesVisitor(data).visit(query); + fuseSumCountAggregates(data.fuse_map); + } + /// Rewrite all aggregate functions to add -OrNull suffix to them if (settings.aggregate_functions_null_for_empty) { diff --git a/tests/queries/0_stateless/01744_fuse_sum_count_aggregate.sql b/tests/queries/0_stateless/01744_fuse_sum_count_aggregate.sql index b2553e273fb..cad7b5803d4 100644 --- a/tests/queries/0_stateless/01744_fuse_sum_count_aggregate.sql +++ b/tests/queries/0_stateless/01744_fuse_sum_count_aggregate.sql @@ -2,7 +2,7 @@ DROP TABLE IF EXISTS fuse_tbl; CREATE TABLE fuse_tbl(a Int8, b Int8) Engine = Log; INSERT INTO fuse_tbl SELECT number, number + 1 FROM numbers(1, 20); -SET optimize_fuse_functions= 1; +SET optimize_fuse_sum_count_avg = 1; SELECT sum(a), sum(b), count(b) from fuse_tbl; EXPLAIN SYNTAX SELECT sum(a), sum(b), count(b) from fuse_tbl; SELECT '---------NOT trigger fuse--------'; diff --git a/tests/queries/0_stateless/01956_fuse_quantile_optimization.sql b/tests/queries/0_stateless/01956_fuse_quantile_optimization.sql index 0b9243ded19..1f476740aef 100644 --- a/tests/queries/0_stateless/01956_fuse_quantile_optimization.sql +++ b/tests/queries/0_stateless/01956_fuse_quantile_optimization.sql @@ -14,7 +14,7 @@ SELECT quantileBFloat16(0.2)(d), quantileBFloat16(0.3)(d), quantileBFloat16(0.4) SELECT '---------After fuse result-----------'; -set optimize_fuse_functions=true; +set optimize_fuse_quantile=true; SELECT 'quantile:'; EXPLAIN SYNTAX SELECT quantile(0.2)(d), quantile(0.3)(d) FROM datetime; SELECT quantile(0.2)(d), quantile(0.3)(d) FROM datetime; From 5a1aeeb04409f53acaf9c35049e86a04ac9b5679 Mon Sep 17 00:00:00 2001 From: vdimir Date: Tue, 14 Sep 2021 15:21:16 +0300 Subject: [PATCH 32/80] Use `optimize_syntax_fuse_aggregate` instead of `optimize_fuse_sum_count_avg` and `optimize_fuse_quantile` --- docs/en/operations/settings/settings.md | 4 ++-- .../aggregate-functions/reference/sumcount.md | 2 +- docs/ru/operations/settings/settings.md | 4 ++-- .../aggregate-functions/reference/sumcount.md | 2 +- src/Core/Settings.h | 4 ++-- src/Interpreters/TreeOptimizer.cpp | 2 +- src/Interpreters/TreeRewriter.cpp | 2 +- tests/performance/fuse_sumcount.xml | 10 +++++----- .../0_stateless/01744_fuse_sum_count_aggregate.sql | 2 +- .../0_stateless/01956_fuse_quantile_optimization.sql | 2 +- 10 files changed, 17 insertions(+), 17 deletions(-) diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index 1441e90b33f..452e6644968 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -3109,7 +3109,7 @@ Result: └─────┘ ``` -## optimize_fuse_sum_count_avg {#optimize_fuse_sum_count_avg} +## optimize_syntax_fuse_aggregate {#optimize_syntax_fuse_aggregate} Enables to fuse aggregate functions with identical argument. It rewrites query contains at least two aggregate functions from [sum](../../sql-reference/aggregate-functions/reference/sum.md#agg_function-sum), [count](../../sql-reference/aggregate-functions/reference/count.md#agg_function-count) or [avg](../../sql-reference/aggregate-functions/reference/avg.md#agg_function-avg) with identical argument to [sumCount](../../sql-reference/aggregate-functions/reference/sumcount.md#agg_function-sumCount). @@ -3126,7 +3126,7 @@ Query: ``` sql CREATE TABLE fuse_tbl(a Int8, b Int8) Engine = Log; -SET optimize_fuse_sum_count_avg = 1; +SET optimize_syntax_fuse_aggregate = 1; EXPLAIN SYNTAX SELECT sum(a), sum(b), count(b), avg(b) from fuse_tbl FORMAT TSV; ``` diff --git a/docs/en/sql-reference/aggregate-functions/reference/sumcount.md b/docs/en/sql-reference/aggregate-functions/reference/sumcount.md index b2cb2cfdc09..da0fe8099e8 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/sumcount.md +++ b/docs/en/sql-reference/aggregate-functions/reference/sumcount.md @@ -43,4 +43,4 @@ Result: **See also** -- [optimize_fuse_sum_count_avg](../../../operations/settings/settings.md#optimize_fuse_sum_count_avg) setting. +- [optimize_syntax_fuse_aggregate](../../../operations/settings/settings.md#optimize_syntax_fuse_aggregate) setting. diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md index 9e926a63c62..cab2f74c9ed 100644 --- a/docs/ru/operations/settings/settings.md +++ b/docs/ru/operations/settings/settings.md @@ -2954,7 +2954,7 @@ SELECT * FROM test LIMIT 10 OFFSET 100; Значение по умолчанию: `1800`. -## optimize_fuse_sum_count_avg {#optimize_fuse_sum_count_avg} +## optimize_syntax_fuse_aggregate {#optimize_syntax_fuse_aggregate} Позволяет объединить агрегатные функции с одинаковым аргументом. Запрос, содержащий по крайней мере две агрегатные функции: [sum](../../sql-reference/aggregate-functions/reference/sum.md#agg_function-sum), [count](../../sql-reference/aggregate-functions/reference/count.md#agg_function-count) или [avg](../../sql-reference/aggregate-functions/reference/avg.md#agg_function-avg) с одинаковым аргументом, перезаписывается как [sumCount](../../sql-reference/aggregate-functions/reference/sumcount.md#agg_function-sumCount). @@ -2971,7 +2971,7 @@ SELECT * FROM test LIMIT 10 OFFSET 100; ``` sql CREATE TABLE fuse_tbl(a Int8, b Int8) Engine = Log; -SET optimize_fuse_sum_count_avg = 1; +SET optimize_syntax_fuse_aggregate = 1; EXPLAIN SYNTAX SELECT sum(a), sum(b), count(b), avg(b) from fuse_tbl FORMAT TSV; ``` diff --git a/docs/ru/sql-reference/aggregate-functions/reference/sumcount.md b/docs/ru/sql-reference/aggregate-functions/reference/sumcount.md index 0606b06fba0..06aa140a50e 100644 --- a/docs/ru/sql-reference/aggregate-functions/reference/sumcount.md +++ b/docs/ru/sql-reference/aggregate-functions/reference/sumcount.md @@ -43,4 +43,4 @@ SELECT sumCount(x) from s_table; **Смотрите также** -- Настройка [optimize_fuse_sum_count_avg](../../../operations/settings/settings.md#optimize_fuse_sum_count_avg) +- Настройка [optimize_syntax_fuse_aggregate](../../../operations/settings/settings.md#optimize_syntax_fuse_aggregate) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index fe9aa44e9f7..b3c9a0e2976 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -439,8 +439,7 @@ class IColumn; M(Bool, allow_non_metadata_alters, true, "Allow to execute alters which affects not only tables metadata, but also data on disk", 0) \ M(Bool, enable_global_with_statement, true, "Propagate WITH statements to UNION queries and all subqueries", 0) \ M(Bool, aggregate_functions_null_for_empty, false, "Rewrite all aggregate functions in a query, adding -OrNull suffix to them", 0) \ - M(Bool, optimize_fuse_sum_count_avg, false, "Fuse aggregate functions sum(), avg(), count() with identical arguments into one sumCount() call, if the query has at least two different functions", 0) \ - M(Bool, optimize_fuse_quantile, false, "Fuse multiply quantile-family functions with the same argument into quantilesXXX()[]", 0) \ + M(Bool, optimize_syntax_fuse_aggregate, false, "Fuse aggregate functions (`sum, avg, count` with identical arguments into one `sumCount`, quantile-family functions with the same argument into `quantiles*(...)[...]`)", 0) \ M(Bool, flatten_nested, true, "If true, columns of type Nested will be flatten to separate array columns instead of one array of tuples", 0) \ M(Bool, asterisk_include_materialized_columns, false, "Include MATERIALIZED columns for wildcard query", 0) \ M(Bool, asterisk_include_alias_columns, false, "Include ALIAS columns for wildcard query", 0) \ @@ -497,6 +496,7 @@ class IColumn; M(Bool, allow_experimental_bigint_types, true, "Obsolete setting, does nothing.", 0) \ M(HandleKafkaErrorMode, handle_kafka_error_mode, HandleKafkaErrorMode::DEFAULT, "Obsolete setting, does nothing.", 0) \ M(Bool, database_replicated_ddl_output, true, "Obsolete setting, does nothing.", 0) \ + M(Bool, optimize_fuse_sum_count_avg, false, "Obsolete, use optimize_syntax_fuse_aggregate", 0) \ /** The section above is for obsolete settings. Do not add anything there. */ diff --git a/src/Interpreters/TreeOptimizer.cpp b/src/Interpreters/TreeOptimizer.cpp index fb220cddc02..f75b8dae83b 100644 --- a/src/Interpreters/TreeOptimizer.cpp +++ b/src/Interpreters/TreeOptimizer.cpp @@ -732,7 +732,7 @@ void TreeOptimizer::apply(ASTPtr & query, TreeRewriterResult & result, /// Remove duplicated columns from USING(...). optimizeUsing(select_query); - if (settings.optimize_fuse_quantile) + if (settings.optimize_syntax_fuse_aggregate) optimizeFuseQuantileFunctions(query); } diff --git a/src/Interpreters/TreeRewriter.cpp b/src/Interpreters/TreeRewriter.cpp index 2bdad8b698f..371a9d81f24 100644 --- a/src/Interpreters/TreeRewriter.cpp +++ b/src/Interpreters/TreeRewriter.cpp @@ -1037,7 +1037,7 @@ void TreeRewriter::normalize( // if we have at least two different functions. E.g. we will replace sum(x) // and count(x) with sumCount(x).1 and sumCount(x).2, and sumCount() will // be calculated only once because of CSE. - if (settings.optimize_fuse_sum_count_avg) + if (settings.optimize_fuse_sum_count_avg || settings.optimize_syntax_fuse_aggregate) { FuseSumCountAggregatesVisitor::Data data; FuseSumCountAggregatesVisitor(data).visit(query); diff --git a/tests/performance/fuse_sumcount.xml b/tests/performance/fuse_sumcount.xml index b2eb0e678e2..05e715868ea 100644 --- a/tests/performance/fuse_sumcount.xml +++ b/tests/performance/fuse_sumcount.xml @@ -6,7 +6,7 @@ Also test GROUP BY with and without keys, because they might have different optimizations. --> - 1 + 1 @@ -21,13 +21,13 @@ SELECT sum(number) FROM numbers(1000000000) FORMAT Null SELECT sum(number), count(number) FROM numbers(1000000000) FORMAT Null - SELECT sum(number), count(number) FROM numbers(1000000000) SETTINGS optimize_fuse_sum_count_avg = 0 FORMAT Null + SELECT sum(number), count(number) FROM numbers(1000000000) SETTINGS optimize_syntax_fuse_aggregate = 0 FORMAT Null SELECT sum(number), avg(number), count(number) FROM numbers(1000000000) FORMAT Null - SELECT sum(number), avg(number), count(number) FROM numbers(1000000000) SETTINGS optimize_fuse_sum_count_avg = 0 FORMAT Null + SELECT sum(number), avg(number), count(number) FROM numbers(1000000000) SETTINGS optimize_syntax_fuse_aggregate = 0 FORMAT Null SELECT sum(number) FROM numbers(100000000) GROUP BY intHash32(number) % 1000 FORMAT Null SELECT sum(number), count(number) FROM numbers(100000000) GROUP BY intHash32(number) % 1000 FORMAT Null - SELECT sum(number), count(number) FROM numbers(100000000) GROUP BY intHash32(number) % 1000 SETTINGS optimize_fuse_sum_count_avg = 0 FORMAT Null + SELECT sum(number), count(number) FROM numbers(100000000) GROUP BY intHash32(number) % 1000 SETTINGS optimize_syntax_fuse_aggregate = 0 FORMAT Null SELECT sum(number), avg(number), count(number) FROM numbers(100000000) GROUP BY intHash32(number) % 1000 FORMAT Null - SELECT sum(number), avg(number), count(number) FROM numbers(100000000) GROUP BY intHash32(number) % 1000 SETTINGS optimize_fuse_sum_count_avg = 0 FORMAT Null + SELECT sum(number), avg(number), count(number) FROM numbers(100000000) GROUP BY intHash32(number) % 1000 SETTINGS optimize_syntax_fuse_aggregate = 0 FORMAT Null diff --git a/tests/queries/0_stateless/01744_fuse_sum_count_aggregate.sql b/tests/queries/0_stateless/01744_fuse_sum_count_aggregate.sql index cad7b5803d4..3cb54e074aa 100644 --- a/tests/queries/0_stateless/01744_fuse_sum_count_aggregate.sql +++ b/tests/queries/0_stateless/01744_fuse_sum_count_aggregate.sql @@ -2,7 +2,7 @@ DROP TABLE IF EXISTS fuse_tbl; CREATE TABLE fuse_tbl(a Int8, b Int8) Engine = Log; INSERT INTO fuse_tbl SELECT number, number + 1 FROM numbers(1, 20); -SET optimize_fuse_sum_count_avg = 1; +SET optimize_syntax_fuse_aggregate = 1; SELECT sum(a), sum(b), count(b) from fuse_tbl; EXPLAIN SYNTAX SELECT sum(a), sum(b), count(b) from fuse_tbl; SELECT '---------NOT trigger fuse--------'; diff --git a/tests/queries/0_stateless/01956_fuse_quantile_optimization.sql b/tests/queries/0_stateless/01956_fuse_quantile_optimization.sql index 1f476740aef..197ef8a2cda 100644 --- a/tests/queries/0_stateless/01956_fuse_quantile_optimization.sql +++ b/tests/queries/0_stateless/01956_fuse_quantile_optimization.sql @@ -14,7 +14,7 @@ SELECT quantileBFloat16(0.2)(d), quantileBFloat16(0.3)(d), quantileBFloat16(0.4) SELECT '---------After fuse result-----------'; -set optimize_fuse_quantile=true; +SET optimize_syntax_fuse_aggregate = true; SELECT 'quantile:'; EXPLAIN SYNTAX SELECT quantile(0.2)(d), quantile(0.3)(d) FROM datetime; SELECT quantile(0.2)(d), quantile(0.3)(d) FROM datetime; From a3304a87a497e78badfc4fab61595587b0052660 Mon Sep 17 00:00:00 2001 From: vdimir Date: Tue, 14 Sep 2021 15:27:12 +0300 Subject: [PATCH 33/80] Rename optimize_syntax_fuse_aggregate -> optimize_syntax_fuse_functions --- docs/en/operations/settings/settings.md | 4 ++-- .../aggregate-functions/reference/sumcount.md | 2 +- docs/ru/operations/settings/settings.md | 6 +++--- .../aggregate-functions/reference/sumcount.md | 2 +- src/Core/Settings.h | 4 ++-- src/Interpreters/TreeOptimizer.cpp | 2 +- src/Interpreters/TreeRewriter.cpp | 2 +- tests/performance/fuse_sumcount.xml | 10 +++++----- .../0_stateless/01744_fuse_sum_count_aggregate.sql | 2 +- .../0_stateless/01956_fuse_quantile_optimization.sql | 2 +- 10 files changed, 18 insertions(+), 18 deletions(-) diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index 5ed40b03705..93afb5a8fa3 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -3306,7 +3306,7 @@ Result: └─────┘ ``` -## optimize_syntax_fuse_aggregate {#optimize_syntax_fuse_aggregate} +## optimize_syntax_fuse_functions {#optimize_syntax_fuse_functions} Enables to fuse aggregate functions with identical argument. It rewrites query contains at least two aggregate functions from [sum](../../sql-reference/aggregate-functions/reference/sum.md#agg_function-sum), [count](../../sql-reference/aggregate-functions/reference/count.md#agg_function-count) or [avg](../../sql-reference/aggregate-functions/reference/avg.md#agg_function-avg) with identical argument to [sumCount](../../sql-reference/aggregate-functions/reference/sumcount.md#agg_function-sumCount). @@ -3323,7 +3323,7 @@ Query: ``` sql CREATE TABLE fuse_tbl(a Int8, b Int8) Engine = Log; -SET optimize_syntax_fuse_aggregate = 1; +SET optimize_syntax_fuse_functions = 1; EXPLAIN SYNTAX SELECT sum(a), sum(b), count(b), avg(b) from fuse_tbl FORMAT TSV; ``` diff --git a/docs/en/sql-reference/aggregate-functions/reference/sumcount.md b/docs/en/sql-reference/aggregate-functions/reference/sumcount.md index 417554084b2..00a7a9fc9f1 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/sumcount.md +++ b/docs/en/sql-reference/aggregate-functions/reference/sumcount.md @@ -43,4 +43,4 @@ Result: **See also** -- [optimize_syntax_fuse_aggregate](../../../operations/settings/settings.md#optimize_syntax_fuse_aggregate) setting. +- [optimize_syntax_fuse_functions](../../../operations/settings/settings.md#optimize_syntax_fuse_functions) setting. diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md index a0b81121624..82a4b0bc1cd 100644 --- a/docs/ru/operations/settings/settings.md +++ b/docs/ru/operations/settings/settings.md @@ -3122,7 +3122,7 @@ SELECT * FROM test LIMIT 10 OFFSET 100; Значение по умолчанию: `1800`. -## optimize_syntax_fuse_aggregate {#optimize_syntax_fuse_aggregate} +## optimize_syntax_fuse_functions {#optimize_syntax_fuse_functions} Позволяет объединить агрегатные функции с одинаковым аргументом. Запрос, содержащий по крайней мере две агрегатные функции: [sum](../../sql-reference/aggregate-functions/reference/sum.md#agg_function-sum), [count](../../sql-reference/aggregate-functions/reference/count.md#agg_function-count) или [avg](../../sql-reference/aggregate-functions/reference/avg.md#agg_function-avg) с одинаковым аргументом, перезаписывается как [sumCount](../../sql-reference/aggregate-functions/reference/sumcount.md#agg_function-sumCount). @@ -3139,7 +3139,7 @@ SELECT * FROM test LIMIT 10 OFFSET 100; ``` sql CREATE TABLE fuse_tbl(a Int8, b Int8) Engine = Log; -SET optimize_syntax_fuse_aggregate = 1; +SET optimize_syntax_fuse_functions = 1; EXPLAIN SYNTAX SELECT sum(a), sum(b), count(b), avg(b) from fuse_tbl FORMAT TSV; ``` @@ -3333,7 +3333,7 @@ SETTINGS index_granularity = 8192 │ ## force_optimize_projection {#force-optimize-projection} -Включает или отключает обязательное использование [проекций](../../engines/table-engines/mergetree-family/mergetree.md#projections) в запросах `SELECT`, если поддержка проекций включена (см. настройку [allow_experimental_projection_optimization](#allow-experimental-projection-optimization)). +Включает или отключает обязательное использование [проекций](../../engines/table-engines/mergetree-family/mergetree.md#projections) в запросах `SELECT`, если поддержка проекций включена (см. настройку [allow_experimental_projection_optimization](#allow-experimental-projection-optimization)). Возможные значения: diff --git a/docs/ru/sql-reference/aggregate-functions/reference/sumcount.md b/docs/ru/sql-reference/aggregate-functions/reference/sumcount.md index 5cab33ace6d..ac721577a9a 100644 --- a/docs/ru/sql-reference/aggregate-functions/reference/sumcount.md +++ b/docs/ru/sql-reference/aggregate-functions/reference/sumcount.md @@ -43,4 +43,4 @@ SELECT sumCount(x) from s_table; **Смотрите также** -- Настройка [optimize_syntax_fuse_aggregate](../../../operations/settings/settings.md#optimize_syntax_fuse_aggregate) +- Настройка [optimize_syntax_fuse_functions](../../../operations/settings/settings.md#optimize_syntax_fuse_functions) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 2c1ab0d26c2..bb71273ae25 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -455,7 +455,7 @@ class IColumn; M(Bool, allow_non_metadata_alters, true, "Allow to execute alters which affects not only tables metadata, but also data on disk", 0) \ M(Bool, enable_global_with_statement, true, "Propagate WITH statements to UNION queries and all subqueries", 0) \ M(Bool, aggregate_functions_null_for_empty, false, "Rewrite all aggregate functions in a query, adding -OrNull suffix to them", 0) \ - M(Bool, optimize_syntax_fuse_aggregate, false, "Fuse aggregate functions (`sum, avg, count` with identical arguments into one `sumCount`, quantile-family functions with the same argument into `quantiles*(...)[...]`)", 0) \ + M(Bool, optimize_syntax_fuse_functions, false, "Fuse aggregate functions (`sum, avg, count` with identical arguments into one `sumCount`, quantile-family functions with the same argument into `quantiles*(...)[...]`)", 0) \ M(Bool, flatten_nested, true, "If true, columns of type Nested will be flatten to separate array columns instead of one array of tuples", 0) \ M(Bool, asterisk_include_materialized_columns, false, "Include MATERIALIZED columns for wildcard query", 0) \ M(Bool, asterisk_include_alias_columns, false, "Include ALIAS columns for wildcard query", 0) \ @@ -521,7 +521,7 @@ class IColumn; M(HandleKafkaErrorMode, handle_kafka_error_mode, HandleKafkaErrorMode::DEFAULT, "Obsolete setting, does nothing.", 0) \ M(Bool, database_replicated_ddl_output, true, "Obsolete setting, does nothing.", 0) \ M(UInt64, replication_alter_columns_timeout, 60, "Obsolete setting, does nothing.", 0) \ - M(Bool, optimize_fuse_sum_count_avg, false, "Obsolete, use optimize_syntax_fuse_aggregate", 0) \ + M(Bool, optimize_fuse_sum_count_avg, false, "Obsolete, use optimize_syntax_fuse_functions", 0) \ /** The section above is for obsolete settings. Do not add anything there. */ diff --git a/src/Interpreters/TreeOptimizer.cpp b/src/Interpreters/TreeOptimizer.cpp index 38d8bdd081a..2cee9fe356f 100644 --- a/src/Interpreters/TreeOptimizer.cpp +++ b/src/Interpreters/TreeOptimizer.cpp @@ -776,7 +776,7 @@ void TreeOptimizer::apply(ASTPtr & query, TreeRewriterResult & result, /// Remove duplicated columns from USING(...). optimizeUsing(select_query); - if (settings.optimize_syntax_fuse_aggregate) + if (settings.optimize_syntax_fuse_functions) optimizeFuseQuantileFunctions(query); } diff --git a/src/Interpreters/TreeRewriter.cpp b/src/Interpreters/TreeRewriter.cpp index bdeb00490c3..96dc6522b73 100644 --- a/src/Interpreters/TreeRewriter.cpp +++ b/src/Interpreters/TreeRewriter.cpp @@ -1068,7 +1068,7 @@ void TreeRewriter::normalize( // if we have at least two different functions. E.g. we will replace sum(x) // and count(x) with sumCount(x).1 and sumCount(x).2, and sumCount() will // be calculated only once because of CSE. - if (settings.optimize_fuse_sum_count_avg || settings.optimize_syntax_fuse_aggregate) + if (settings.optimize_fuse_sum_count_avg || settings.optimize_syntax_fuse_functions) { FuseSumCountAggregatesVisitor::Data data; FuseSumCountAggregatesVisitor(data).visit(query); diff --git a/tests/performance/fuse_sumcount.xml b/tests/performance/fuse_sumcount.xml index 05e715868ea..237edb1b970 100644 --- a/tests/performance/fuse_sumcount.xml +++ b/tests/performance/fuse_sumcount.xml @@ -6,7 +6,7 @@ Also test GROUP BY with and without keys, because they might have different optimizations. --> - 1 + 1 @@ -21,13 +21,13 @@ SELECT sum(number) FROM numbers(1000000000) FORMAT Null SELECT sum(number), count(number) FROM numbers(1000000000) FORMAT Null - SELECT sum(number), count(number) FROM numbers(1000000000) SETTINGS optimize_syntax_fuse_aggregate = 0 FORMAT Null + SELECT sum(number), count(number) FROM numbers(1000000000) SETTINGS optimize_syntax_fuse_functions = 0 FORMAT Null SELECT sum(number), avg(number), count(number) FROM numbers(1000000000) FORMAT Null - SELECT sum(number), avg(number), count(number) FROM numbers(1000000000) SETTINGS optimize_syntax_fuse_aggregate = 0 FORMAT Null + SELECT sum(number), avg(number), count(number) FROM numbers(1000000000) SETTINGS optimize_syntax_fuse_functions = 0 FORMAT Null SELECT sum(number) FROM numbers(100000000) GROUP BY intHash32(number) % 1000 FORMAT Null SELECT sum(number), count(number) FROM numbers(100000000) GROUP BY intHash32(number) % 1000 FORMAT Null - SELECT sum(number), count(number) FROM numbers(100000000) GROUP BY intHash32(number) % 1000 SETTINGS optimize_syntax_fuse_aggregate = 0 FORMAT Null + SELECT sum(number), count(number) FROM numbers(100000000) GROUP BY intHash32(number) % 1000 SETTINGS optimize_syntax_fuse_functions = 0 FORMAT Null SELECT sum(number), avg(number), count(number) FROM numbers(100000000) GROUP BY intHash32(number) % 1000 FORMAT Null - SELECT sum(number), avg(number), count(number) FROM numbers(100000000) GROUP BY intHash32(number) % 1000 SETTINGS optimize_syntax_fuse_aggregate = 0 FORMAT Null + SELECT sum(number), avg(number), count(number) FROM numbers(100000000) GROUP BY intHash32(number) % 1000 SETTINGS optimize_syntax_fuse_functions = 0 FORMAT Null diff --git a/tests/queries/0_stateless/01744_fuse_sum_count_aggregate.sql b/tests/queries/0_stateless/01744_fuse_sum_count_aggregate.sql index 3cb54e074aa..4648889ca27 100644 --- a/tests/queries/0_stateless/01744_fuse_sum_count_aggregate.sql +++ b/tests/queries/0_stateless/01744_fuse_sum_count_aggregate.sql @@ -2,7 +2,7 @@ DROP TABLE IF EXISTS fuse_tbl; CREATE TABLE fuse_tbl(a Int8, b Int8) Engine = Log; INSERT INTO fuse_tbl SELECT number, number + 1 FROM numbers(1, 20); -SET optimize_syntax_fuse_aggregate = 1; +SET optimize_syntax_fuse_functions = 1; SELECT sum(a), sum(b), count(b) from fuse_tbl; EXPLAIN SYNTAX SELECT sum(a), sum(b), count(b) from fuse_tbl; SELECT '---------NOT trigger fuse--------'; diff --git a/tests/queries/0_stateless/01956_fuse_quantile_optimization.sql b/tests/queries/0_stateless/01956_fuse_quantile_optimization.sql index 197ef8a2cda..304aa803bcc 100644 --- a/tests/queries/0_stateless/01956_fuse_quantile_optimization.sql +++ b/tests/queries/0_stateless/01956_fuse_quantile_optimization.sql @@ -14,7 +14,7 @@ SELECT quantileBFloat16(0.2)(d), quantileBFloat16(0.3)(d), quantileBFloat16(0.4) SELECT '---------After fuse result-----------'; -SET optimize_syntax_fuse_aggregate = true; +SET optimize_syntax_fuse_functions = true; SELECT 'quantile:'; EXPLAIN SYNTAX SELECT quantile(0.2)(d), quantile(0.3)(d) FROM datetime; SELECT quantile(0.2)(d), quantile(0.3)(d) FROM datetime; From ba33fbbf1d026c5b3d312ba3b7ed6cbc61c24fd1 Mon Sep 17 00:00:00 2001 From: zhangxiao871 Date: Wed, 15 Sep 2021 01:14:14 +0800 Subject: [PATCH 34/80] add the last logfile that is less than the zxid. --- src/Coordination/ZooKeeperDataReader.cpp | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/src/Coordination/ZooKeeperDataReader.cpp b/src/Coordination/ZooKeeperDataReader.cpp index cf644110786..2295109a236 100644 --- a/src/Coordination/ZooKeeperDataReader.cpp +++ b/src/Coordination/ZooKeeperDataReader.cpp @@ -572,12 +572,24 @@ void deserializeLogsAndApplyToStorage(KeeperStorage & storage, const std::string LOG_INFO(log, "Totally have {} logs", existing_logs.size()); - for (auto [zxid, log_path] : existing_logs) + std::vector stored_files; + for (auto it = existing_logs.rbegin(); it != existing_logs.rend(); ++it) { - if (zxid > storage.zxid) - deserializeLogAndApplyToStorage(storage, log_path, log); - else - LOG_INFO(log, "Skipping log {}, it's ZXID {} is smaller than storages ZXID {}", log_path, zxid, storage.zxid); + if (it->first >= storage.zxid) + { + stored_files.emplace_back(it->second); + } + else if (it->first < storage.zxid) + { + /// add the last logfile that is less than the zxid + stored_files.emplace_back(it->second); + break; + } + } + + for (auto & log_path : stored_files) + { + deserializeLogAndApplyToStorage(storage, log_path, log); } } From c86832b1d794b0958bf5898cd6319d6707aa8c81 Mon Sep 17 00:00:00 2001 From: zhangxiao871 Date: Wed, 15 Sep 2021 01:19:19 +0800 Subject: [PATCH 35/80] fix deserialize log order. --- src/Coordination/ZooKeeperDataReader.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Coordination/ZooKeeperDataReader.cpp b/src/Coordination/ZooKeeperDataReader.cpp index 2295109a236..bb2dd6b168f 100644 --- a/src/Coordination/ZooKeeperDataReader.cpp +++ b/src/Coordination/ZooKeeperDataReader.cpp @@ -587,9 +587,9 @@ void deserializeLogsAndApplyToStorage(KeeperStorage & storage, const std::string } } - for (auto & log_path : stored_files) + for (auto it = stored_files.rbegin(); it != stored_files.rend(); ++it) { - deserializeLogAndApplyToStorage(storage, log_path, log); + deserializeLogAndApplyToStorage(storage, *it, log); } } From cd7fb8132dc774c872f05a12b1e0a774bedec4ee Mon Sep 17 00:00:00 2001 From: vdimir Date: Wed, 15 Sep 2021 15:15:42 +0300 Subject: [PATCH 36/80] Split GatherFunctionQuantileVisitor to h/cpp, fix fuzzed bugs --- .../GatherFunctionQuantileVisitor.cpp | 70 +++++++++++++++++++ .../GatherFunctionQuantileVisitor.h | 40 ++--------- src/Interpreters/TreeOptimizer.cpp | 49 +++++++------ src/Interpreters/ya.make | 1 + .../01956_fuse_quantile_optimization.sql | 8 ++- 5 files changed, 110 insertions(+), 58 deletions(-) create mode 100644 src/Interpreters/GatherFunctionQuantileVisitor.cpp diff --git a/src/Interpreters/GatherFunctionQuantileVisitor.cpp b/src/Interpreters/GatherFunctionQuantileVisitor.cpp new file mode 100644 index 00000000000..9e34d06eda9 --- /dev/null +++ b/src/Interpreters/GatherFunctionQuantileVisitor.cpp @@ -0,0 +1,70 @@ +#include +#include +#include +#include + +namespace DB +{ + +/// Mapping from quantile functions for single value to plural +static const std::unordered_map quantile_fuse_name_mapping = { + {NameQuantile::name, NameQuantiles::name}, + {NameQuantileDeterministic::name, NameQuantilesDeterministic::name}, + {NameQuantileExact::name, NameQuantilesExact::name}, + {NameQuantileExactLow::name, NameQuantilesExactLow::name}, + {NameQuantileExactHigh::name, NameQuantilesExactHigh::name}, + {NameQuantileExactExclusive::name, NameQuantilesExactExclusive::name}, + {NameQuantileExactInclusive::name, NameQuantilesExactInclusive::name}, + {NameQuantileExactWeighted::name, NameQuantilesExactWeighted::name}, + {NameQuantileTiming::name, NameQuantilesTiming::name}, + {NameQuantileTimingWeighted::name, NameQuantilesTimingWeighted::name}, + {NameQuantileTDigest::name, NameQuantilesTDigest::name}, + {NameQuantileTDigestWeighted::name, NameQuantilesTDigestWeighted::name}, + {NameQuantileBFloat16::name, NameQuantilesBFloat16::name} +}; + +String GatherFunctionQuantileData::getFusedName(const String & func_name) +{ + if (auto it = quantile_fuse_name_mapping.find(func_name); it != quantile_fuse_name_mapping.end()) + return it->second; + throw DB::Exception(ErrorCodes::LOGICAL_ERROR, "Function '{}' is not quantile-family or cannot be fused", func_name); +} + +void GatherFunctionQuantileData::visit(ASTFunction & function, ASTPtr & ast) +{ + if (!quantile_fuse_name_mapping.contains(function.name)) + return; + + fuse_quantile[function.name].addFuncNode(ast); +} + +void GatherFunctionQuantileData::FuseQuantileAggregatesData::addFuncNode(ASTPtr & ast) +{ + const auto * func = ast->as(); + if (!func) + return; + + const auto & arguments = func->arguments->children; + + bool need_two_args = func->name == NameQuantileDeterministic::name + || func->name == NameQuantileExactWeighted::name + || func->name == NameQuantileTimingWeighted::name + || func->name == NameQuantileTDigestWeighted::name; + if (arguments.size() != (need_two_args ? 2 : 1)) + return; + + if (arguments[0]->getColumnName().find(',') != std::string::npos) + return; + String arg_name = arguments[0]->getColumnName(); + if (need_two_args) + { + if (arguments[1]->getColumnName().find(',') != std::string::npos) + return; + arg_name += "," + arguments[1]->getColumnName(); + } + + arg_map_function[arg_name].push_back(&ast); +} + +} + diff --git a/src/Interpreters/GatherFunctionQuantileVisitor.h b/src/Interpreters/GatherFunctionQuantileVisitor.h index 21411ec89ef..188dad4731f 100644 --- a/src/Interpreters/GatherFunctionQuantileVisitor.h +++ b/src/Interpreters/GatherFunctionQuantileVisitor.h @@ -7,22 +7,6 @@ namespace DB { -/// Mapping from quantile functions for single value to plural -static const std::unordered_map quantile_fuse_name_mapping = { - {NameQuantile::name, NameQuantiles::name}, - {NameQuantileDeterministic::name, NameQuantilesDeterministic::name}, - {NameQuantileExact::name, NameQuantilesExact::name}, - {NameQuantileExactLow::name, NameQuantilesExactLow::name}, - {NameQuantileExactHigh::name, NameQuantilesExactHigh::name}, - {NameQuantileExactExclusive::name, NameQuantilesExactExclusive::name}, - {NameQuantileExactInclusive::name, NameQuantilesExactInclusive::name}, - {NameQuantileExactWeighted::name, NameQuantilesExactWeighted::name}, - {NameQuantileTiming::name, NameQuantilesTiming::name}, - {NameQuantileTimingWeighted::name, NameQuantilesTimingWeighted::name}, - {NameQuantileTDigest::name, NameQuantilesTDigest::name}, - {NameQuantileTDigestWeighted::name, NameQuantilesTDigestWeighted::name}, - {NameQuantileBFloat16::name, NameQuantilesBFloat16::name} -}; /// Gather all the `quantile*` functions class GatherFunctionQuantileData @@ -31,32 +15,18 @@ public: struct FuseQuantileAggregatesData { std::unordered_map> arg_map_function; - void addFuncNode(ASTPtr & ast) - { - const auto * func = ast->as(); - auto argument = func->arguments->children.at(0)->getColumnName(); - /// This functions needs two arguments. - if (func->name == NameQuantileDeterministic::name - || func->name == NameQuantileExactWeighted::name - || func->name == NameQuantileTimingWeighted::name - || func->name == NameQuantileTDigestWeighted::name) - argument = argument + "," + func->arguments->children.at(1)->getColumnName(); - - arg_map_function[argument].push_back(&ast); - } + void addFuncNode(ASTPtr & ast); }; using TypeToVisit = ASTFunction; + std::unordered_map fuse_quantile; - void visit(ASTFunction & function, ASTPtr & ast) - { - if (quantile_fuse_name_mapping.find(function.name) == quantile_fuse_name_mapping.end()) - return; + void visit(ASTFunction & function, ASTPtr & ast); + + static String getFusedName(const String & func_name); - fuse_quantile[function.name].addFuncNode(ast); - } }; using GatherFunctionQuantileVisitor = InDepthNodeVisitor, true>; diff --git a/src/Interpreters/TreeOptimizer.cpp b/src/Interpreters/TreeOptimizer.cpp index 2cee9fe356f..347b5d706cb 100644 --- a/src/Interpreters/TreeOptimizer.cpp +++ b/src/Interpreters/TreeOptimizer.cpp @@ -637,40 +637,45 @@ void optimizeFunctionsToSubcolumns(ASTPtr & query, const StorageMetadataPtr & me /// rewrite to : SELECT quantiles(0.5, 0.9, 0.95)(x)[1], quantiles(0.5, 0.9, 0.95)(x)[2], quantiles(0.5, 0.9, 0.95)(x)[3] FROM ... void fuseCandidate(std::unordered_map & fuse_quantile) { - for (const auto & candidate : fuse_quantile) + for (auto & candidate : fuse_quantile) { String func_name = candidate.first; - GatherFunctionQuantileData::FuseQuantileAggregatesData args_to_functions = candidate.second; + auto & args_to_functions = candidate.second; // Try to fuse multiply `quantile*` Function to plural for (auto it : args_to_functions.arg_map_function) { std::vector & functions = it.second; - size_t count = functions.size(); - if (count > 1) - { - auto param_exp_list = std::make_shared(); - for (auto * ast : functions) - { - const ASTs & parameters = (*ast)->as()->parameters->as().children; - if (parameters.size() > 1) - throw Exception("Aggregate function " + func_name + "require one parameter or less.", ErrorCodes::LOGICAL_ERROR); - param_exp_list->children.push_back(parameters[0]); - } - auto func_base = makeASTFunction(quantile_fuse_name_mapping.find(func_name)->second, (*functions[0])->as()->arguments->children); - func_base->parameters = param_exp_list; + if (functions.size() < 2) + continue; - size_t idx = 0; - for (auto & ast : functions) - { - auto ast_new = makeASTFunction("arrayElement", func_base, std::make_shared(UInt64(++idx))); - ast_new->setAlias((*ast)->tryGetAlias()); - *ast = ast_new; - } + const auto & common_arguments = (*functions[0])->as()->arguments->children; + auto func_base = makeASTFunction(GatherFunctionQuantileData::getFusedName(func_name)); + func_base->arguments->children = common_arguments; + func_base->parameters = std::make_shared(); + + for (const auto * ast : functions) + { + assert(ast && *ast); + const auto * func = (*ast)->as(); + assert(func && func->parameters->as()); + const ASTs & parameters = func->parameters->as().children; + if (parameters.size() != 1) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Aggregate function '{}' requires one parameter", func_name); + func_base->parameters->children.push_back(parameters[0]); + } + + for (size_t i = 0; i < functions.size(); ++i) + { + std::shared_ptr ast_new = makeASTFunction("arrayElement", func_base, std::make_shared(i + 1)); + if (const auto & alias = (*functions[i])->tryGetAlias(); !alias.empty()) + ast_new->setAlias(alias); + *functions[i] = ast_new; } } } } + void optimizeFuseQuantileFunctions(ASTPtr & query) { GatherFunctionQuantileVisitor::Data data{}; diff --git a/src/Interpreters/ya.make b/src/Interpreters/ya.make index ed9e28b5a11..3346972b711 100644 --- a/src/Interpreters/ya.make +++ b/src/Interpreters/ya.make @@ -63,6 +63,7 @@ SRCS( ExtractExpressionInfoVisitor.cpp FillingRow.cpp FunctionNameNormalizer.cpp + GatherFunctionQuantileVisitor.cpp HashJoin.cpp IExternalLoadable.cpp IInterpreter.cpp diff --git a/tests/queries/0_stateless/01956_fuse_quantile_optimization.sql b/tests/queries/0_stateless/01956_fuse_quantile_optimization.sql index 304aa803bcc..32225c6f4aa 100644 --- a/tests/queries/0_stateless/01956_fuse_quantile_optimization.sql +++ b/tests/queries/0_stateless/01956_fuse_quantile_optimization.sql @@ -52,7 +52,13 @@ EXPLAIN SYNTAX SELECT quantileBFloat16(0.2)(d), quantileBFloat16(0.3)(d), quanti SELECT quantileBFloat16(0.2)(d), quantileBFloat16(0.3)(d), quantileBFloat16(0.4)(d + 1) FROM datetime; EXPLAIN SYNTAX SELECT quantile(0.2)(d) as k, quantile(0.3)(d) FROM datetime order by quantile(0.2)(d); -DROP TABLE datetime; SELECT b, quantile(0.5)(x) as a, quantile(0.9)(x) as y, quantile(0.95)(x) FROM (select number as x, number % 2 as b from numbers(10)) group by b; EXPLAIN SYNTAX SELECT b, quantile(0.5)(x) as a, quantile(0.9)(x) as y, quantile(0.95)(x) FROM (select number as x, number % 2 as b from numbers(10)) group by b; + +-- fuzzer +SELECT quantileDeterministic(0.00009999999747378752)(1023) FROM datetime FORMAT Null; -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT quantileTiming(0.5)(NULL, NULL, quantileTiming(-inf)(NULL), NULL) FROM datetime FORMAT Null; -- { serverError ILLEGAL_AGGREGATION } +SELECT quantileTDigest(NULL)(NULL, quantileTDigest(3.4028234663852886e38)(NULL, d + NULL), 2.), NULL FORMAT Null; -- { serverError ILLEGAL_AGGREGATION } + +DROP TABLE datetime; From 489a92c0679b51ed2f4b6b15ae634b6f30fa59b7 Mon Sep 17 00:00:00 2001 From: kssenii Date: Wed, 15 Sep 2021 18:22:33 +0300 Subject: [PATCH 37/80] Review fixes --- src/Databases/DatabaseAtomic.cpp | 1 - src/Databases/IDatabase.h | 2 +- .../PostgreSQL/DatabaseMaterializedPostgreSQL.cpp | 15 +++++++++++---- .../PostgreSQL/DatabaseMaterializedPostgreSQL.h | 2 +- src/Interpreters/InterpreterAlterQuery.cpp | 4 ++-- .../PostgreSQL/PostgreSQLReplicationHandler.cpp | 12 ++++++------ 6 files changed, 21 insertions(+), 15 deletions(-) diff --git a/src/Databases/DatabaseAtomic.cpp b/src/Databases/DatabaseAtomic.cpp index 7b1a8c6446e..2dbcd652004 100644 --- a/src/Databases/DatabaseAtomic.cpp +++ b/src/Databases/DatabaseAtomic.cpp @@ -2,7 +2,6 @@ #include #include #include -#include #include #include #include diff --git a/src/Databases/IDatabase.h b/src/Databases/IDatabase.h index 92041b366a7..3cb1856d08d 100644 --- a/src/Databases/IDatabase.h +++ b/src/Databases/IDatabase.h @@ -282,7 +282,7 @@ public: /// Delete data and metadata stored inside the database, if exists. virtual void drop(ContextPtr /*context*/) {} - virtual void applyNewSettings(const SettingsChanges &, ContextPtr) + virtual void applySettingsChanges(const SettingsChanges &, ContextPtr) { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Database engine {} either does not support settings, or does not support altering settings", diff --git a/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.cpp b/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.cpp index 42720fa4eb1..cb3cda8ab79 100644 --- a/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.cpp +++ b/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.cpp @@ -127,9 +127,11 @@ void DatabaseMaterializedPostgreSQL::loadStoredObjects( } -void DatabaseMaterializedPostgreSQL::applyNewSettings(const SettingsChanges & settings_changes, ContextPtr query_context) +void DatabaseMaterializedPostgreSQL::applySettingsChanges(const SettingsChanges & settings_changes, ContextPtr query_context) { std::lock_guard lock(handler_mutex); + bool need_update_on_disk = false; + for (const auto & change : settings_changes) { if (!settings->has(change.name)) @@ -140,12 +142,12 @@ void DatabaseMaterializedPostgreSQL::applyNewSettings(const SettingsChanges & se if (!query_context->isInternalQuery()) throw Exception(ErrorCodes::QUERY_NOT_ALLOWED, "Changing setting `{}` is not allowed", change.name); - DatabaseOnDisk::modifySettingsMetadata(settings_changes, query_context); + need_update_on_disk = true; } else if ((change.name == "materialized_postgresql_allow_automatic_update") || (change.name == "materialized_postgresql_max_block_size")) { - DatabaseOnDisk::modifySettingsMetadata(settings_changes, query_context); replication_handler->setSetting(change); + need_update_on_disk = true; } else { @@ -154,6 +156,9 @@ void DatabaseMaterializedPostgreSQL::applyNewSettings(const SettingsChanges & se settings->applyChange(change); } + + if (need_update_on_disk) + DatabaseOnDisk::modifySettingsMetadata(settings_changes, query_context); } @@ -185,6 +190,8 @@ StoragePtr DatabaseMaterializedPostgreSQL::tryGetTable(const String & name, Cont /// `except` is not empty in case it is detach and it will contain only one table name - name of detached table. +/// In case we have a user defined setting `materialized_postgresql_tables_list`, then list of tables is always taken there. +/// Otherwise we traverse materialized storages to find out the list. String DatabaseMaterializedPostgreSQL::getFormattedTablesList(const String & except) const { String tables_list; @@ -334,7 +341,7 @@ StoragePtr DatabaseMaterializedPostgreSQL::detachTable(const String & table_name auto nested = table_to_delete->as()->getNested(); if (!nested) - throw Exception(ErrorCodes::UNKNOWN_TABLE, "Inner table `{}` does not exist", table_name); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Inner table `{}` does not exist", table_name); std::lock_guard lock(handler_mutex); replication_handler->removeTableFromReplication(table_name); diff --git a/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.h b/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.h index effd0ec653a..a0f9b3fce7a 100644 --- a/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.h +++ b/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.h @@ -61,7 +61,7 @@ public: void stopReplication(); - void applyNewSettings(const SettingsChanges & settings_changes, ContextPtr query_context) override; + void applySettingsChanges(const SettingsChanges & settings_changes, ContextPtr query_context) override; void shutdown() override; diff --git a/src/Interpreters/InterpreterAlterQuery.cpp b/src/Interpreters/InterpreterAlterQuery.cpp index d0054439c56..6595e1c02be 100644 --- a/src/Interpreters/InterpreterAlterQuery.cpp +++ b/src/Interpreters/InterpreterAlterQuery.cpp @@ -189,7 +189,7 @@ BlockIO InterpreterAlterQuery::executeToDatabase(const ASTAlterQuery & alter) for (const auto & command : alter_commands) { if (command.type != AlterCommand::MODIFY_DATABASE_SETTING) - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Alter of type '{}' is not supported by databases", alterTypeToString(command.type)); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unsupported alter type for database engines"); } for (const auto & command : alter_commands) @@ -197,7 +197,7 @@ BlockIO InterpreterAlterQuery::executeToDatabase(const ASTAlterQuery & alter) if (!command.ignore) { if (command.type == AlterCommand::MODIFY_DATABASE_SETTING) - database->applyNewSettings(command.settings_changes, getContext()); + database->applySettingsChanges(command.settings_changes, getContext()); else throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unsupported alter command"); } diff --git a/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp b/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp index 23ee7532b5e..456ca2c514e 100644 --- a/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp +++ b/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp @@ -119,6 +119,10 @@ void PostgreSQLReplicationHandler::startSynchronization(bool throw_on_error) /// Data older than this is not available anymore. String snapshot_name, start_lsn; + /// Also lets have a separate non-replication connection, because we need two parallel transactions and + /// one connection can have one transaction at a time. + auto tmp_connection = std::make_shared(connection_info); + auto initial_sync = [&]() { LOG_TRACE(log, "Starting tables sync load"); @@ -136,15 +140,11 @@ void PostgreSQLReplicationHandler::startSynchronization(bool throw_on_error) createReplicationSlot(tx, start_lsn, snapshot_name); } - /// Loading tables from snapshot requires a certain transaction type, so we need to open a new transactin. - /// But we cannot have more than one open transaciton on the same connection. Therefore we have - /// a separate connection to load tables. - postgres::Connection tmp_connection(connection_info); for (const auto & [table_name, storage] : materialized_storages) { try { - nested_storages[table_name] = loadFromSnapshot(tmp_connection, snapshot_name, table_name, storage->as ()); + nested_storages[table_name] = loadFromSnapshot(*tmp_connection, snapshot_name, table_name, storage->as()); } catch (Exception & e) { @@ -211,7 +211,7 @@ void PostgreSQLReplicationHandler::startSynchronization(bool throw_on_error) /// Handler uses it only for loadFromSnapshot and shutdown methods. consumer = std::make_shared( context, - std::make_shared(connection_info), + std::move(tmp_connection), replication_slot, publication_name, start_lsn, From d0c3b87d84231c825caaca6360cf92ca5277224e Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Thu, 16 Sep 2021 00:17:20 +0300 Subject: [PATCH 38/80] improvements --- src/AggregateFunctions/ya.make | 1 + src/Common/ya.make | 1 - src/Compression/ya.make | 5 - ...rConstantBase.h => FunctionConstantBase.h} | 27 ++-- src/Functions/FunctionMathConstFloat64.h | 36 ----- src/Functions/buildId.cpp | 38 ----- src/Functions/e.cpp | 24 --- src/Functions/hostName.cpp | 27 ---- src/Functions/mathConstants.cpp | 47 ++++++ src/Functions/pi.cpp | 24 --- src/Functions/serverConstants.cpp | 144 ++++++++++++++++++ src/Functions/serverUUID.cpp | 27 ---- src/Functions/tcpPort.cpp | 25 --- src/Functions/timezone.cpp | 28 ---- src/Functions/uptime.cpp | 26 ---- src/Functions/version.cpp | 30 ---- src/Functions/ya.make | 11 +- src/Functions/zookeeperSessionUptime.cpp | 25 --- src/IO/ya.make | 1 - src/Interpreters/ya.make | 1 + src/Storages/ya.make | 2 +- 21 files changed, 213 insertions(+), 337 deletions(-) rename src/Functions/{FunctionServerConstantBase.h => FunctionConstantBase.h} (55%) delete mode 100644 src/Functions/FunctionMathConstFloat64.h delete mode 100644 src/Functions/buildId.cpp delete mode 100644 src/Functions/e.cpp delete mode 100644 src/Functions/hostName.cpp create mode 100644 src/Functions/mathConstants.cpp delete mode 100644 src/Functions/pi.cpp create mode 100644 src/Functions/serverConstants.cpp delete mode 100644 src/Functions/serverUUID.cpp delete mode 100644 src/Functions/tcpPort.cpp delete mode 100644 src/Functions/timezone.cpp delete mode 100644 src/Functions/uptime.cpp delete mode 100644 src/Functions/version.cpp delete mode 100644 src/Functions/zookeeperSessionUptime.cpp diff --git a/src/AggregateFunctions/ya.make b/src/AggregateFunctions/ya.make index f0374fb69bc..e18ec35c540 100644 --- a/src/AggregateFunctions/ya.make +++ b/src/AggregateFunctions/ya.make @@ -49,6 +49,7 @@ SRCS( AggregateFunctionSimpleLinearRegression.cpp AggregateFunctionSimpleState.cpp AggregateFunctionSingleValueOrNull.cpp + AggregateFunctionSparkbar.cpp AggregateFunctionState.cpp AggregateFunctionStatistics.cpp AggregateFunctionStatisticsSimple.cpp diff --git a/src/Common/ya.make b/src/Common/ya.make index bf9d3627661..e130fc7ebea 100644 --- a/src/Common/ya.make +++ b/src/Common/ya.make @@ -43,7 +43,6 @@ SRCS( Epoll.cpp ErrorCodes.cpp Exception.cpp - ExternalLoaderStatus.cpp FieldVisitorDump.cpp FieldVisitorHash.cpp FieldVisitorSum.cpp diff --git a/src/Compression/ya.make b/src/Compression/ya.make index 420ada412e0..8ba48696225 100644 --- a/src/Compression/ya.make +++ b/src/Compression/ya.make @@ -35,11 +35,6 @@ SRCS( CompressionFactoryAdditions.cpp ICompressionCodec.cpp LZ4_decompress_faster.cpp - fuzzers/compressed_buffer_fuzzer.cpp - fuzzers/delta_decompress_fuzzer.cpp - fuzzers/double_delta_decompress_fuzzer.cpp - fuzzers/encrypted_decompress_fuzzer.cpp - fuzzers/lz4_decompress_fuzzer.cpp getCompressionCodecForFile.cpp ) diff --git a/src/Functions/FunctionServerConstantBase.h b/src/Functions/FunctionConstantBase.h similarity index 55% rename from src/Functions/FunctionServerConstantBase.h rename to src/Functions/FunctionConstantBase.h index 2dbc427dcf7..35096a9942f 100644 --- a/src/Functions/FunctionServerConstantBase.h +++ b/src/Functions/FunctionConstantBase.h @@ -7,22 +7,29 @@ namespace DB { -/// Base class for functions which return server-level constant like version() or uptime() -template -class FunctionServerConstantBase : public IFunction +/// Base class for constant functions +template +class FunctionConstantBase : public IFunction { public: - static constexpr auto name = func_name; - explicit FunctionServerConstantBase(ContextPtr context, T && value_) + /// For server-level constants (uptime(), version(), etc) + explicit FunctionConstantBase(ContextPtr context, T && constant_value_) : is_distributed(context->isDistributed()) - , value(std::forward(value_)) + , constant_value(std::forward(constant_value_)) + { + } + + /// For real constants (pi(), e(), etc) + explicit FunctionConstantBase(const T & constant_value_) + : is_distributed(false) + , constant_value(constant_value_) { } String getName() const override { - return name; + return Derived::name; } size_t getNumberOfArguments() const override @@ -38,19 +45,19 @@ public: bool isDeterministic() const override { return false; } bool isDeterministicInScopeOfQuery() const override { return true; } - /// Function may return different values on different shareds/replicas, so it's not constant for distributed query + /// Some functions may return different values on different shards/replicas, so it's not constant for distributed query bool isSuitableForConstantFolding() const override { return !is_distributed; } bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } ColumnPtr executeImpl(const ColumnsWithTypeAndName &, const DataTypePtr &, size_t input_rows_count) const override { - return ColumnT().createColumnConst(input_rows_count, value); + return ColumnT().createColumnConst(input_rows_count, constant_value); } private: bool is_distributed; - T value; + const T constant_value; }; } diff --git a/src/Functions/FunctionMathConstFloat64.h b/src/Functions/FunctionMathConstFloat64.h deleted file mode 100644 index 1d866b3dcd8..00000000000 --- a/src/Functions/FunctionMathConstFloat64.h +++ /dev/null @@ -1,36 +0,0 @@ -#pragma once - -#include -#include -#include - - -namespace DB -{ - -template -class FunctionMathConstFloat64 : public IFunction -{ -public: - static constexpr auto name = Impl::name; - static FunctionPtr create(ContextPtr) { return std::make_shared(); } - -private: - String getName() const override { return name; } - - size_t getNumberOfArguments() const override { return 0; } - - bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } - - DataTypePtr getReturnTypeImpl(const DataTypes & /*arguments*/) const override - { - return std::make_shared(); - } - - ColumnPtr executeImpl(const ColumnsWithTypeAndName &, const DataTypePtr & result_type, size_t input_rows_count) const override - { - return result_type->createColumnConst(input_rows_count, Impl::value); - } -}; - -} diff --git a/src/Functions/buildId.cpp b/src/Functions/buildId.cpp deleted file mode 100644 index 40223cf0add..00000000000 --- a/src/Functions/buildId.cpp +++ /dev/null @@ -1,38 +0,0 @@ -#if defined(__ELF__) && !defined(__FreeBSD__) - -#include -#include -#include - -namespace DB -{ -namespace -{ - constexpr char name[] = "buildId"; - - /// buildId() - returns the compiler build id of the running binary. - class FunctionBuildId : public FunctionServerConstantBase - { - public: - static FunctionPtr create(ContextPtr context) { return std::make_shared(context); } - - explicit FunctionBuildId(ContextPtr context) : FunctionServerConstantBase(context, SymbolIndex::instance()->getBuildIDHex()) {} - }; -} - -void registerFunctionBuildId(FunctionFactory & factory) -{ - factory.registerFunction(); -} - -} - -#else - -namespace DB -{ -class FunctionFactory; -void registerFunctionBuildId(FunctionFactory &) {} -} - -#endif diff --git a/src/Functions/e.cpp b/src/Functions/e.cpp deleted file mode 100644 index c43bb7d572a..00000000000 --- a/src/Functions/e.cpp +++ /dev/null @@ -1,24 +0,0 @@ -#include -#include - -namespace DB -{ -namespace -{ - -struct EImpl -{ - static constexpr auto name = "e"; - static constexpr double value = 2.7182818284590452353602874713526624977572470; -}; - -using FunctionE = FunctionMathConstFloat64; - -} - -void registerFunctionE(FunctionFactory & factory) -{ - factory.registerFunction(); -} - -} diff --git a/src/Functions/hostName.cpp b/src/Functions/hostName.cpp deleted file mode 100644 index 564f68a911d..00000000000 --- a/src/Functions/hostName.cpp +++ /dev/null @@ -1,27 +0,0 @@ -#include -#include -#include - -namespace DB -{ -namespace -{ - constexpr char name[] = "hostName"; - - /// Get the host name. Is is constant on single server, but is not constant in distributed queries. - class FunctionHostName : public FunctionServerConstantBase - { - public: - static FunctionPtr create(ContextPtr context) { return std::make_shared(context); } - - explicit FunctionHostName(ContextPtr context) : FunctionServerConstantBase(context, DNSResolver::instance().getHostName()) {} - }; -} - -void registerFunctionHostName(FunctionFactory & factory) -{ - factory.registerFunction(); - factory.registerAlias("hostname", "hostName"); -} - -} diff --git a/src/Functions/mathConstants.cpp b/src/Functions/mathConstants.cpp new file mode 100644 index 00000000000..ecc2f8c48b5 --- /dev/null +++ b/src/Functions/mathConstants.cpp @@ -0,0 +1,47 @@ +#include +#include + +namespace DB +{ + +namespace +{ + template + class FunctionMathConstFloat64 : public FunctionConstantBase, Float64, DataTypeFloat64> + { + public: + static constexpr auto name = Impl::name; + static FunctionPtr create(ContextPtr) { return std::make_shared(); } + FunctionMathConstFloat64() : FunctionConstantBase, Float64, DataTypeFloat64>(Impl::value) {} + }; + + + struct EImpl + { + static constexpr char name[] = "e"; + static constexpr double value = 2.7182818284590452353602874713526624977572470; + }; + + using FunctionE = FunctionMathConstFloat64; + + + struct PiImpl + { + static constexpr char name[] = "pi"; + static constexpr double value = 3.1415926535897932384626433832795028841971693; + }; + + using FunctionPi = FunctionMathConstFloat64; +} + +void registerFunctionE(FunctionFactory & factory) +{ + factory.registerFunction(); +} + +void registerFunctionPi(FunctionFactory & factory) +{ + factory.registerFunction(FunctionFactory::CaseInsensitive); +} + +} diff --git a/src/Functions/pi.cpp b/src/Functions/pi.cpp deleted file mode 100644 index efa536c7314..00000000000 --- a/src/Functions/pi.cpp +++ /dev/null @@ -1,24 +0,0 @@ -#include -#include - -namespace DB -{ -namespace -{ - -struct PiImpl -{ - static constexpr auto name = "pi"; - static constexpr double value = 3.1415926535897932384626433832795028841971693; -}; - -using FunctionPi = FunctionMathConstFloat64; - -} - -void registerFunctionPi(FunctionFactory & factory) -{ - factory.registerFunction(FunctionFactory::CaseInsensitive); -} - -} diff --git a/src/Functions/serverConstants.cpp b/src/Functions/serverConstants.cpp new file mode 100644 index 00000000000..6282553e35c --- /dev/null +++ b/src/Functions/serverConstants.cpp @@ -0,0 +1,144 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#if !defined(ARCADIA_BUILD) +# include +#endif + + +namespace DB +{ +namespace +{ + +#if defined(__ELF__) && !defined(__FreeBSD__) + /// buildId() - returns the compiler build id of the running binary. + class FunctionBuildId : public FunctionConstantBase + { + public: + static constexpr auto name = "buildId"; + static FunctionPtr create(ContextPtr context) { return std::make_shared(context); } + explicit FunctionBuildId(ContextPtr context) : FunctionConstantBase(context, SymbolIndex::instance()->getBuildIDHex()) {} + }; +#endif + + + /// Get the host name. Is is constant on single server, but is not constant in distributed queries. + class FunctionHostName : public FunctionConstantBase + { + public: + static constexpr auto name = "hostName"; + static FunctionPtr create(ContextPtr context) { return std::make_shared(context); } + explicit FunctionHostName(ContextPtr context) : FunctionConstantBase(context, DNSResolver::instance().getHostName()) {} + }; + + + class FunctionServerUUID : public FunctionConstantBase + { + public: + static constexpr auto name = "serverUUID"; + static FunctionPtr create(ContextPtr context) { return std::make_shared(context); } + explicit FunctionServerUUID(ContextPtr context) : FunctionConstantBase(context, ServerUUID::get()) {} + }; + + + class FunctionTcpPort : public FunctionConstantBase + { + public: + static constexpr auto name = "tcpPort"; + static FunctionPtr create(ContextPtr context) { return std::make_shared(context); } + explicit FunctionTcpPort(ContextPtr context) : FunctionConstantBase(context, context->getTCPPort()) {} + }; + + + /// Returns the server time zone. + class FunctionTimezone : public FunctionConstantBase + { + public: + static constexpr auto name = "timezone"; + static FunctionPtr create(ContextPtr context) { return std::make_shared(context); } + explicit FunctionTimezone(ContextPtr context) : FunctionConstantBase(context, String{DateLUT::instance().getTimeZone()}) {} + }; + + + /// Returns server uptime in seconds. + class FunctionUptime : public FunctionConstantBase + { + public: + static constexpr auto name = "uptime"; + static FunctionPtr create(ContextPtr context) { return std::make_shared(context); } + explicit FunctionUptime(ContextPtr context) : FunctionConstantBase(context, context->getUptimeSeconds()) {} + }; + + + /// version() - returns the current version as a string. + class FunctionVersion : public FunctionConstantBase + { + public: + static constexpr auto name = "version"; + static FunctionPtr create(ContextPtr context) { return std::make_shared(context); } + explicit FunctionVersion(ContextPtr context) : FunctionConstantBase(context, VERSION_STRING) {} + }; + + class FunctionZooKeeperSessionUptime : public FunctionConstantBase + { + public: + static constexpr auto name = "zookeeperSessionUptime"; + explicit FunctionZooKeeperSessionUptime(ContextPtr context) : FunctionConstantBase(context, context->getZooKeeperSessionUptime()) {} + static FunctionPtr create(ContextPtr context) { return std::make_shared(context); } + }; +} + + +void registerFunctionBuildId(FunctionFactory & factory) +{ +#if defined(__ELF__) && !defined(__FreeBSD__) + factory.registerFunction(); +#endif +} + +void registerFunctionHostName(FunctionFactory & factory) +{ + factory.registerFunction(); + factory.registerAlias("hostname", "hostName"); +} + +void registerFunctionServerUUID(FunctionFactory & factory) +{ + factory.registerFunction(); +} + +void registerFunctionTcpPort(FunctionFactory & factory) +{ + factory.registerFunction(); +} + +void registerFunctionTimezone(FunctionFactory & factory) +{ + factory.registerFunction(); + factory.registerAlias("timeZone", "timezone"); +} + +void registerFunctionUptime(FunctionFactory & factory) +{ + factory.registerFunction(); +} + +void registerFunctionVersion(FunctionFactory & factory) +{ + factory.registerFunction(FunctionFactory::CaseInsensitive); +} + +void registerFunctionZooKeeperSessionUptime(FunctionFactory & factory) +{ + factory.registerFunction(); +} + +} + diff --git a/src/Functions/serverUUID.cpp b/src/Functions/serverUUID.cpp deleted file mode 100644 index 4d353a81df2..00000000000 --- a/src/Functions/serverUUID.cpp +++ /dev/null @@ -1,27 +0,0 @@ -#include -#include -#include - -namespace DB -{ - -namespace -{ - constexpr char name[] = "serverUUID"; - - class FunctionServerUUID : public FunctionServerConstantBase - { - public: - static FunctionPtr create(ContextPtr context) { return std::make_shared(context); } - - explicit FunctionServerUUID(ContextPtr context) : FunctionServerConstantBase(context, ServerUUID::get()) {} - }; -} - -void registerFunctionServerUUID(FunctionFactory & factory) -{ - factory.registerFunction(); -} - -} - diff --git a/src/Functions/tcpPort.cpp b/src/Functions/tcpPort.cpp deleted file mode 100644 index b2e46d929cf..00000000000 --- a/src/Functions/tcpPort.cpp +++ /dev/null @@ -1,25 +0,0 @@ -#include -#include - -namespace DB -{ - -namespace -{ - constexpr char name[] = "tcpPort"; - - class FunctionTcpPort : public FunctionServerConstantBase - { - public: - static FunctionPtr create(ContextPtr context) { return std::make_shared(context); } - - explicit FunctionTcpPort(ContextPtr context) : FunctionServerConstantBase(context, context->getTCPPort()) {} - }; -} - -void registerFunctionTcpPort(FunctionFactory & factory) -{ - factory.registerFunction(); -} - -} diff --git a/src/Functions/timezone.cpp b/src/Functions/timezone.cpp deleted file mode 100644 index 9694d22c8b8..00000000000 --- a/src/Functions/timezone.cpp +++ /dev/null @@ -1,28 +0,0 @@ -#include -#include -#include - - -namespace DB -{ -namespace -{ - constexpr char name[] = "timezone"; - - /// Returns the server time zone. - class FunctionTimezone : public FunctionServerConstantBase - { - public: - static FunctionPtr create(ContextPtr context) { return std::make_shared(context); } - - explicit FunctionTimezone(ContextPtr context) : FunctionServerConstantBase(context, String{DateLUT::instance().getTimeZone()}) {} - }; -} - -void registerFunctionTimezone(FunctionFactory & factory) -{ - factory.registerFunction(); - factory.registerAlias("timeZone", "timezone"); -} - -} diff --git a/src/Functions/uptime.cpp b/src/Functions/uptime.cpp deleted file mode 100644 index 9c2a9d35762..00000000000 --- a/src/Functions/uptime.cpp +++ /dev/null @@ -1,26 +0,0 @@ -#include -#include - -namespace DB -{ - -namespace -{ - constexpr char name[] = "uptime"; - - /// Returns server uptime in seconds. - class FunctionUptime : public FunctionServerConstantBase - { - public: - static FunctionPtr create(ContextPtr context) { return std::make_shared(context); } - - explicit FunctionUptime(ContextPtr context) : FunctionServerConstantBase(context, context->getUptimeSeconds()) {} - }; -} - -void registerFunctionUptime(FunctionFactory & factory) -{ - factory.registerFunction(); -} - -} diff --git a/src/Functions/version.cpp b/src/Functions/version.cpp deleted file mode 100644 index d59774bd23d..00000000000 --- a/src/Functions/version.cpp +++ /dev/null @@ -1,30 +0,0 @@ -#include -#include - -#if !defined(ARCADIA_BUILD) -# include -#endif - -namespace DB -{ - -namespace -{ - constexpr char name[] = "version"; - - /// version() - returns the current version as a string. - class FunctionVersion : public FunctionServerConstantBase - { - public: - static FunctionPtr create(ContextPtr context) { return std::make_shared(context); } - - explicit FunctionVersion(ContextPtr context) : FunctionServerConstantBase(context, VERSION_STRING) {} - }; -} - -void registerFunctionVersion(FunctionFactory & factory) -{ - factory.registerFunction(FunctionFactory::CaseInsensitive); -} - -} diff --git a/src/Functions/ya.make b/src/Functions/ya.make index fbfff751314..877e342f036 100644 --- a/src/Functions/ya.make +++ b/src/Functions/ya.make @@ -218,7 +218,6 @@ SRCS( blockNumber.cpp blockSerializedSize.cpp blockSize.cpp - buildId.cpp byteSize.cpp caseWithExpression.cpp cbrt.cpp @@ -249,7 +248,6 @@ SRCS( divide/divide.cpp divide/divideImpl.cpp dumpColumnStructure.cpp - e.cpp empty.cpp encodeXMLComponent.cpp encrypt.cpp @@ -314,7 +312,6 @@ SRCS( hasThreadFuzzer.cpp hasToken.cpp hasTokenCaseInsensitive.cpp - hostName.cpp hyperscanRegexpChecker.cpp hypot.cpp identity.cpp @@ -362,6 +359,7 @@ SRCS( map.cpp match.cpp materialize.cpp + mathConstants.cpp minus.cpp modulo.cpp moduloOrZero.cpp @@ -402,7 +400,6 @@ SRCS( nullIf.cpp padString.cpp partitionId.cpp - pi.cpp plus.cpp pointInEllipses.cpp pointInPolygon.cpp @@ -479,7 +476,7 @@ SRCS( s2RectIntersection.cpp s2RectUnion.cpp s2ToGeo.cpp - serverUUID.cpp + serverConstants.cpp sigmoid.cpp sign.cpp sin.cpp @@ -505,13 +502,11 @@ SRCS( synonyms.cpp tan.cpp tanh.cpp - tcpPort.cpp tgamma.cpp throwIf.cpp tid.cpp timeSlot.cpp timeSlots.cpp - timezone.cpp timezoneOf.cpp timezoneOffset.cpp toColumnTypeName.cpp @@ -574,9 +569,7 @@ SRCS( tupleToNameValuePairs.cpp upper.cpp upperUTF8.cpp - uptime.cpp validateNestedArraySizes.cpp - version.cpp visibleWidth.cpp visitParamExtractBool.cpp visitParamExtractFloat.cpp diff --git a/src/Functions/zookeeperSessionUptime.cpp b/src/Functions/zookeeperSessionUptime.cpp deleted file mode 100644 index b5163b7b22a..00000000000 --- a/src/Functions/zookeeperSessionUptime.cpp +++ /dev/null @@ -1,25 +0,0 @@ -#include -#include - - -namespace DB -{ -namespace -{ - constexpr char name[] = "zookeeperSessionUptime"; - - class FunctionZooKeeperSessionUptime : public FunctionServerConstantBase - { - public: - FunctionZooKeeperSessionUptime(ContextPtr context) : FunctionServerConstantBase(context, context->getZooKeeperSessionUptime()) {} - - static FunctionPtr create(ContextPtr context) { return std::make_shared(context); } - }; -} - -void registerFunctionZooKeeperSessionUptime(FunctionFactory & factory) -{ - factory.registerFunction(); -} - -} diff --git a/src/IO/ya.make b/src/IO/ya.make index cc24a690308..6857a0ccdf4 100644 --- a/src/IO/ya.make +++ b/src/IO/ya.make @@ -61,7 +61,6 @@ SRCS( ReadBufferFromMemory.cpp ReadBufferFromPocoSocket.cpp ReadHelpers.cpp - ReadSettings.cpp SeekAvoidingReadBuffer.cpp SynchronousReader.cpp ThreadPoolReader.cpp diff --git a/src/Interpreters/ya.make b/src/Interpreters/ya.make index 0bc7cb11cf0..ed9e28b5a11 100644 --- a/src/Interpreters/ya.make +++ b/src/Interpreters/ya.make @@ -48,6 +48,7 @@ SRCS( DatabaseAndTableWithAlias.cpp DatabaseCatalog.cpp DictionaryReader.cpp + DuplicateOrderByVisitor.cpp EmbeddedDictionaries.cpp ExecuteScalarSubqueriesVisitor.cpp ExpressionActions.cpp diff --git a/src/Storages/ya.make b/src/Storages/ya.make index c85a0dbd902..2821f0d3791 100644 --- a/src/Storages/ya.make +++ b/src/Storages/ya.make @@ -18,7 +18,7 @@ SRCS( Distributed/DirectoryMonitor.cpp Distributed/DistributedSettings.cpp Distributed/DistributedSink.cpp - ExecutablePoolSettings.cpp + ExecutableSettings.cpp IStorage.cpp IndicesDescription.cpp JoinSettings.cpp From 23f54658a61d21c700727f23d289eac7f806b231 Mon Sep 17 00:00:00 2001 From: tavplubix Date: Thu, 16 Sep 2021 12:15:22 +0300 Subject: [PATCH 39/80] Update serverConstants.cpp --- src/Functions/serverConstants.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Functions/serverConstants.cpp b/src/Functions/serverConstants.cpp index 6282553e35c..6808e6607cf 100644 --- a/src/Functions/serverConstants.cpp +++ b/src/Functions/serverConstants.cpp @@ -96,7 +96,7 @@ namespace } -void registerFunctionBuildId(FunctionFactory & factory) +void registerFunctionBuildId([[maybe_unused]] FunctionFactory & factory) { #if defined(__ELF__) && !defined(__FreeBSD__) factory.registerFunction(); From 661a54827b4fa83ed491580f8592fb714ea54887 Mon Sep 17 00:00:00 2001 From: vdimir Date: Thu, 16 Sep 2021 15:11:14 +0300 Subject: [PATCH 40/80] Do not throw LOGICAL_ERROR from fuse quantile for illegal params --- .../GatherFunctionQuantileVisitor.cpp | 5 ++ src/Interpreters/TreeOptimizer.cpp | 58 ++++++++++--------- .../01956_fuse_quantile_optimization.sql | 6 +- 3 files changed, 39 insertions(+), 30 deletions(-) diff --git a/src/Interpreters/GatherFunctionQuantileVisitor.cpp b/src/Interpreters/GatherFunctionQuantileVisitor.cpp index 9e34d06eda9..74a2312b769 100644 --- a/src/Interpreters/GatherFunctionQuantileVisitor.cpp +++ b/src/Interpreters/GatherFunctionQuantileVisitor.cpp @@ -6,6 +6,11 @@ namespace DB { +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + /// Mapping from quantile functions for single value to plural static const std::unordered_map quantile_fuse_name_mapping = { {NameQuantile::name, NameQuantiles::name}, diff --git a/src/Interpreters/TreeOptimizer.cpp b/src/Interpreters/TreeOptimizer.cpp index 347b5d706cb..bf0962388c3 100644 --- a/src/Interpreters/TreeOptimizer.cpp +++ b/src/Interpreters/TreeOptimizer.cpp @@ -632,39 +632,48 @@ void optimizeFunctionsToSubcolumns(ASTPtr & query, const StorageMetadataPtr & me RewriteFunctionToSubcolumnVisitor(data).visit(query); } +std::shared_ptr getQuantileFuseCandidate(const String & func_name, std::vector & functions) +{ + if (functions.size() < 2) + return nullptr; + + const auto & common_arguments = (*functions[0])->as()->arguments->children; + auto func_base = makeASTFunction(GatherFunctionQuantileData::getFusedName(func_name)); + func_base->arguments->children = common_arguments; + func_base->parameters = std::make_shared(); + + for (const auto * ast : functions) + { + assert(ast && *ast); + const auto * func = (*ast)->as(); + assert(func && func->parameters->as()); + const ASTs & parameters = func->parameters->as().children; + if (parameters.size() != 1) + return nullptr; /// query is illegal, give up + func_base->parameters->children.push_back(parameters[0]); + } + return func_base; +} + /// Rewrites multi quantile()() functions with the same arguments to quantiles()()[] /// eg:SELECT quantile(0.5)(x), quantile(0.9)(x), quantile(0.95)(x) FROM... /// rewrite to : SELECT quantiles(0.5, 0.9, 0.95)(x)[1], quantiles(0.5, 0.9, 0.95)(x)[2], quantiles(0.5, 0.9, 0.95)(x)[3] FROM ... -void fuseCandidate(std::unordered_map & fuse_quantile) +void optimizeFuseQuantileFunctions(ASTPtr & query) { - for (auto & candidate : fuse_quantile) + GatherFunctionQuantileVisitor::Data data{}; + GatherFunctionQuantileVisitor(data).visit(query); + for (auto & candidate : data.fuse_quantile) { String func_name = candidate.first; auto & args_to_functions = candidate.second; - // Try to fuse multiply `quantile*` Function to plural + /// Try to fuse multiply `quantile*` Function to plural for (auto it : args_to_functions.arg_map_function) { std::vector & functions = it.second; - if (functions.size() < 2) + auto func_base = getQuantileFuseCandidate(func_name, functions); + if (!func_base) continue; - - const auto & common_arguments = (*functions[0])->as()->arguments->children; - auto func_base = makeASTFunction(GatherFunctionQuantileData::getFusedName(func_name)); - func_base->arguments->children = common_arguments; - func_base->parameters = std::make_shared(); - - for (const auto * ast : functions) - { - assert(ast && *ast); - const auto * func = (*ast)->as(); - assert(func && func->parameters->as()); - const ASTs & parameters = func->parameters->as().children; - if (parameters.size() != 1) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Aggregate function '{}' requires one parameter", func_name); - func_base->parameters->children.push_back(parameters[0]); - } - for (size_t i = 0; i < functions.size(); ++i) { std::shared_ptr ast_new = makeASTFunction("arrayElement", func_base, std::make_shared(i + 1)); @@ -676,13 +685,6 @@ void fuseCandidate(std::unordered_map Date: Thu, 16 Sep 2021 20:18:34 +0300 Subject: [PATCH 41/80] minor enhancements in async inserts --- programs/server/Server.cpp | 2 +- src/Core/Settings.h | 16 ++++++------- src/Formats/FormatFactory.cpp | 12 ++++++++++ src/Formats/FormatFactory.h | 3 +++ src/Interpreters/AsynchronousInsertQueue.cpp | 7 +++++- src/Interpreters/Context.cpp | 4 ++-- src/Interpreters/executeQuery.cpp | 2 +- .../0_stateless/02015_async_inserts_1.sh | 2 +- .../0_stateless/02015_async_inserts_2.sh | 2 +- .../0_stateless/02015_async_inserts_3.sh | 2 +- .../0_stateless/02015_async_inserts_4.sh | 2 +- .../0_stateless/02015_async_inserts_5.sh | 2 +- .../02015_async_inserts_6.reference | 4 ++++ .../0_stateless/02015_async_inserts_6.sh | 24 +++++++++++++++++++ .../02015_async_inserts_stress_long.sh | 4 ++-- 15 files changed, 68 insertions(+), 20 deletions(-) create mode 100644 tests/queries/0_stateless/02015_async_inserts_6.reference create mode 100755 tests/queries/0_stateless/02015_async_inserts_6.sh diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 5ca9007bdcc..f243ecda108 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -918,7 +918,7 @@ if (ThreadFuzzer::instance().isEffective()) global_context, settings.async_insert_threads, settings.async_insert_max_data_size, - AsynchronousInsertQueue::Timeout{.busy = settings.async_insert_busy_timeout, .stale = settings.async_insert_stale_timeout})); + AsynchronousInsertQueue::Timeout{.busy = settings.async_insert_busy_timeout_ms, .stale = settings.async_insert_stale_timeout_ms})); /// Size of cache for marks (index of MergeTree family of tables). It is mandatory. size_t mark_cache_size = config().getUInt64("mark_cache_size"); diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 0e29168f906..a883f3915e4 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -507,6 +507,14 @@ class IColumn; M(Bool, remote_filesystem_read_prefetch, true, "Should use prefetching when reading data from remote filesystem.", 0) \ M(Int64, read_priority, 0, "Priority to read data from local filesystem. Only supported for 'pread_threadpool' method.", 0) \ \ + M(UInt64, async_insert_threads, 16, "Maximum number of threads to actually parse and insert data in background. Zero means asynchronous mode is disabled", 0) \ + M(Bool, async_insert, false, "If true, data from INSERT query is stored in queue and later flushed to table in background. Makes sense only for inserts via HTTP protocol. If wait_for_async_insert is false, INSERT query is processed almost instantly, otherwise client will wait until data will be flushed to table", 0) \ + M(Bool, wait_for_async_insert, true, "If true wait for processing of asynchronous insertion", 0) \ + M(Seconds, wait_for_async_insert_timeout, DBMS_DEFAULT_LOCK_ACQUIRE_TIMEOUT_SEC, "Timeout for waiting for processing asynchronous insertion", 0) \ + M(UInt64, async_insert_max_data_size, 100000, "Maximum size in bytes of unparsed data collected per query before being inserted", 0) \ + M(Milliseconds, async_insert_busy_timeout_ms, 200, "Maximum time to wait before dumping collected data per query since the first data appeared", 0) \ + M(Milliseconds, async_insert_stale_timeout_ms, 0, "Maximum time to wait before dumping collected data per query since the last data appeared. Zero means no timeout at all", 0) \ + \ /** Experimental functions */ \ M(Bool, allow_experimental_funnel_functions, false, "Enable experimental functions for funnel analysis.", 0) \ M(Bool, allow_experimental_nlp_functions, false, "Enable experimental functions for natural language processing.", 0) \ @@ -602,14 +610,6 @@ class IColumn; M(Bool, output_format_pretty_row_numbers, false, "Add row numbers before each row for pretty output format", 0) \ M(Bool, insert_distributed_one_random_shard, false, "If setting is enabled, inserting into distributed table will choose a random shard to write when there is no sharding key", 0) \ \ - M(UInt64, async_insert_threads, 16, "Maximum number of threads to actually parse and insert data in background. Zero means asynchronous mode is disabled", 0) \ - M(Bool, async_insert_mode, false, "Insert query is processed almost instantly, but an actual data queued for later asynchronous insertion", 0) \ - M(Bool, wait_for_async_insert, true, "If true wait for processing of asynchronous insertion", 0) \ - M(Seconds, wait_for_async_insert_timeout, DBMS_DEFAULT_LOCK_ACQUIRE_TIMEOUT_SEC, "Timeout for waiting for processing asynchronous insertion", 0) \ - M(UInt64, async_insert_max_data_size, 1000000, "Maximum size in bytes of unparsed data collected per query before being inserted", 0) \ - M(Milliseconds, async_insert_busy_timeout, 200, "Maximum time to wait before dumping collected data per query since the first data appeared", 0) \ - M(Milliseconds, async_insert_stale_timeout, 0, "Maximum time to wait before dumping collected data per query since the last data appeared. Zero means no timeout at all", 0) \ - \ M(Bool, cross_to_inner_join_rewrite, true, "Use inner join instead of comma/cross join if possible", 0) \ \ M(Bool, output_format_arrow_low_cardinality_as_dictionary, false, "Enable output LowCardinality type as Dictionary Arrow type", 0) \ diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index a014f85d45f..c8c2b380850 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -439,6 +439,18 @@ bool FormatFactory::checkIfFormatIsColumnOriented(const String & name) return target.is_column_oriented; } +bool FormatFactory::isInputFormat(const String & name) const +{ + auto it = dict.find(name); + return it != dict.end() && (it->second.input_creator || it->second.input_processor_creator); +} + +bool FormatFactory::isOutputFormat(const String & name) const +{ + auto it = dict.find(name); + return it != dict.end() && (it->second.output_creator || it->second.output_processor_creator); +} + FormatFactory & FormatFactory::instance() { static FormatFactory ret; diff --git a/src/Formats/FormatFactory.h b/src/Formats/FormatFactory.h index e935eb4d761..7ff72387509 100644 --- a/src/Formats/FormatFactory.h +++ b/src/Formats/FormatFactory.h @@ -187,6 +187,9 @@ public: return dict; } + bool isInputFormat(const String & name) const; + bool isOutputFormat(const String & name) const; + private: FormatsDictionary dict; diff --git a/src/Interpreters/AsynchronousInsertQueue.cpp b/src/Interpreters/AsynchronousInsertQueue.cpp index 5b9521f334e..da41eb82d5e 100644 --- a/src/Interpreters/AsynchronousInsertQueue.cpp +++ b/src/Interpreters/AsynchronousInsertQueue.cpp @@ -18,6 +18,7 @@ #include #include #include +#include namespace DB @@ -27,6 +28,7 @@ namespace ErrorCodes { extern const int TIMEOUT_EXCEEDED; extern const int UNKNOWN_EXCEPTION; + extern const int UNKNOWN_FORMAT; } AsynchronousInsertQueue::InsertQuery::InsertQuery(const ASTPtr & query_, const Settings & settings_) @@ -166,6 +168,9 @@ void AsynchronousInsertQueue::push(ASTPtr query, ContextPtr query_context) auto table = interpreter.getTable(insert_query); auto sample_block = interpreter.getSampleBlock(insert_query, table, table->getInMemoryMetadataPtr()); + if (!FormatFactory::instance().isInputFormat(insert_query.format)) + throw Exception(ErrorCodes::UNKNOWN_FORMAT, "Unknown input format {}", insert_query.format); + query_context->checkAccess(AccessType::INSERT, insert_query.table_id, sample_block.getNames()); String bytes; @@ -324,7 +329,7 @@ void AsynchronousInsertQueue::cleanup() } if (total_removed) - LOG_TRACE(log, "Removed stale entries for {} queries from asynchronous insertion queue", keys_to_remove.size()); + LOG_TRACE(log, "Removed stale entries for {} queries from asynchronous insertion queue", total_removed); } { diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index be1cb21bbc3..d0e941b0aff 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -2744,8 +2744,8 @@ void Context::setAsynchronousInsertQueue(const std::shared_ptrasync_insert_queue = ptr; } diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index ecf2d87dd5c..0b1746feebc 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -582,7 +582,7 @@ static std::tuple executeQueryImpl( auto * queue = context->getAsynchronousInsertQueue(); const bool async_insert = queue && insert_query && !insert_query->select - && insert_query->hasInlinedData() && settings.async_insert_mode; + && insert_query->hasInlinedData() && settings.async_insert; if (async_insert) { diff --git a/tests/queries/0_stateless/02015_async_inserts_1.sh b/tests/queries/0_stateless/02015_async_inserts_1.sh index 365d2e99b31..b4310f5101c 100755 --- a/tests/queries/0_stateless/02015_async_inserts_1.sh +++ b/tests/queries/0_stateless/02015_async_inserts_1.sh @@ -4,7 +4,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -url="${CLICKHOUSE_URL}&async_insert_mode=1&wait_for_async_insert=1" +url="${CLICKHOUSE_URL}&async_insert=1&wait_for_async_insert=1" ${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS async_inserts" ${CLICKHOUSE_CLIENT} -q "CREATE TABLE async_inserts (id UInt32, s String) ENGINE = Memory" diff --git a/tests/queries/0_stateless/02015_async_inserts_2.sh b/tests/queries/0_stateless/02015_async_inserts_2.sh index 0eb11bb5219..90f5584d84e 100755 --- a/tests/queries/0_stateless/02015_async_inserts_2.sh +++ b/tests/queries/0_stateless/02015_async_inserts_2.sh @@ -4,7 +4,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -url="${CLICKHOUSE_URL}&async_insert_mode=1&wait_for_async_insert=1" +url="${CLICKHOUSE_URL}&async_insert=1&wait_for_async_insert=1" ${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS async_inserts" ${CLICKHOUSE_CLIENT} -q "CREATE TABLE async_inserts (id UInt32, s String) ENGINE = MergeTree ORDER BY id" diff --git a/tests/queries/0_stateless/02015_async_inserts_3.sh b/tests/queries/0_stateless/02015_async_inserts_3.sh index fe97354d3ac..9d85d81caac 100755 --- a/tests/queries/0_stateless/02015_async_inserts_3.sh +++ b/tests/queries/0_stateless/02015_async_inserts_3.sh @@ -4,7 +4,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -url="${CLICKHOUSE_URL}&async_insert_mode=1&wait_for_async_insert=1" +url="${CLICKHOUSE_URL}&async_insert=1&wait_for_async_insert=1" ${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS async_inserts" ${CLICKHOUSE_CLIENT} -q "CREATE TABLE async_inserts (id UInt32, v UInt32 DEFAULT id * id) ENGINE = Memory" diff --git a/tests/queries/0_stateless/02015_async_inserts_4.sh b/tests/queries/0_stateless/02015_async_inserts_4.sh index f8cc0aa0a48..65598923b96 100755 --- a/tests/queries/0_stateless/02015_async_inserts_4.sh +++ b/tests/queries/0_stateless/02015_async_inserts_4.sh @@ -4,7 +4,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -url="${CLICKHOUSE_URL}&async_insert_mode=1&wait_for_async_insert=1" +url="${CLICKHOUSE_URL}&async_insert=1&wait_for_async_insert=1" ${CLICKHOUSE_CLIENT} -q "DROP USER IF EXISTS u_02015_allowed" ${CLICKHOUSE_CLIENT} -q "DROP USER IF EXISTS u_02015_denied" diff --git a/tests/queries/0_stateless/02015_async_inserts_5.sh b/tests/queries/0_stateless/02015_async_inserts_5.sh index e07e274d1d7..05ea876b101 100755 --- a/tests/queries/0_stateless/02015_async_inserts_5.sh +++ b/tests/queries/0_stateless/02015_async_inserts_5.sh @@ -4,7 +4,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -url="${CLICKHOUSE_URL}&async_insert_mode=1&wait_for_async_insert=1" +url="${CLICKHOUSE_URL}&async_insert=1&wait_for_async_insert=1" ${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS async_inserts" ${CLICKHOUSE_CLIENT} -q "CREATE TABLE async_inserts (id UInt32, s String) ENGINE = MergeTree ORDER BY id SETTINGS parts_to_throw_insert = 1" diff --git a/tests/queries/0_stateless/02015_async_inserts_6.reference b/tests/queries/0_stateless/02015_async_inserts_6.reference new file mode 100644 index 00000000000..f3a80cd0cdf --- /dev/null +++ b/tests/queries/0_stateless/02015_async_inserts_6.reference @@ -0,0 +1,4 @@ +Code: 60 +Code: 73 +Code: 73 +Code: 16 diff --git a/tests/queries/0_stateless/02015_async_inserts_6.sh b/tests/queries/0_stateless/02015_async_inserts_6.sh new file mode 100755 index 00000000000..94091783081 --- /dev/null +++ b/tests/queries/0_stateless/02015_async_inserts_6.sh @@ -0,0 +1,24 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +url="${CLICKHOUSE_URL}&async_insert=1&wait_for_async_insert=0" + +${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS async_inserts" +${CLICKHOUSE_CLIENT} -q "CREATE TABLE async_inserts (id UInt32, s String) ENGINE = Memory" + +${CLICKHOUSE_CURL} -sS $url -d 'INSERT INTO async_inserts1 FORMAT JSONEachRow {"id": 1, "s": "a"}' \ + | grep -o "Code: 60" + +${CLICKHOUSE_CURL} -sS $url -d 'INSERT INTO async_inserts FORMAT BadFormat {"id": 1, "s": "a"}' \ + | grep -o "Code: 73" + +${CLICKHOUSE_CURL} -sS $url -d 'INSERT INTO async_inserts FORMAT Pretty {"id": 1, "s": "a"}' \ + | grep -o "Code: 73" + +${CLICKHOUSE_CURL} -sS $url -d 'INSERT INTO async_inserts (id, a) FORMAT JSONEachRow {"id": 1, "s": "a"}' \ + | grep -o "Code: 16" + +${CLICKHOUSE_CLIENT} -q "DROP TABLE async_inserts" diff --git a/tests/queries/0_stateless/02015_async_inserts_stress_long.sh b/tests/queries/0_stateless/02015_async_inserts_stress_long.sh index c11a1be8cef..f9a58818404 100755 --- a/tests/queries/0_stateless/02015_async_inserts_stress_long.sh +++ b/tests/queries/0_stateless/02015_async_inserts_stress_long.sh @@ -7,7 +7,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) function insert1() { - url="${CLICKHOUSE_URL}&async_insert_mode=1&wait_for_async_insert=0" + url="${CLICKHOUSE_URL}&async_insert=1&wait_for_async_insert=0" while true; do ${CLICKHOUSE_CURL} -sS "$url" -d 'INSERT INTO async_inserts FORMAT CSV 1,"a" @@ -18,7 +18,7 @@ function insert1() function insert2() { - url="${CLICKHOUSE_URL}&async_insert_mode=1&wait_for_async_insert=0" + url="${CLICKHOUSE_URL}&async_insert=1&wait_for_async_insert=0" while true; do ${CLICKHOUSE_CURL} -sS "$url" -d 'INSERT INTO async_inserts FORMAT JSONEachRow {"id": 5, "s": "e"} {"id": 6, "s": "f"}' done From 5a4985fe27426a9a66c541ec9077e24222c27a07 Mon Sep 17 00:00:00 2001 From: PHO Date: Fri, 17 Sep 2021 11:12:05 +0900 Subject: [PATCH 42/80] Do not initialize std::pair with a std::tuple GCC/libstdc++ does not allow this and emits a compilation error. The standard does not say they are supposed to be interchangeable. --- src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp b/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp index 1fc279bff23..0fd9d0a0856 100644 --- a/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp +++ b/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp @@ -433,7 +433,7 @@ void MaterializedPostgreSQLConsumer::processReplicationMessage(const char * repl if (new_relation_definition) { - current_schema_data.column_identifiers.emplace_back(std::make_tuple(data_type_id, type_modifier)); + current_schema_data.column_identifiers.emplace_back(std::make_pair(data_type_id, type_modifier)); } else { From 3c4b1ea9c5abfffa3e497e3c2e5c16f7e242a6dc Mon Sep 17 00:00:00 2001 From: PHO Date: Fri, 17 Sep 2021 11:18:01 +0900 Subject: [PATCH 43/80] New setting: output_format_csv_null_representation This is the same as output_format_tsv_null_representation but is for CSV output. --- docs/en/operations/settings/settings.md | 37 +++++++++++++++++++ src/Core/Settings.h | 1 + .../Serializations/SerializationNullable.cpp | 2 +- src/Formats/FormatFactory.cpp | 1 + src/Formats/FormatSettings.h | 1 + ...9_output_csv_null_representation.reference | 4 ++ .../02029_output_csv_null_representation.sql | 16 ++++++++ 7 files changed, 61 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/02029_output_csv_null_representation.reference create mode 100644 tests/queries/0_stateless/02029_output_csv_null_representation.sql diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index e3a46d46cd7..330d38db705 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -2833,6 +2833,43 @@ Possible values: Default value: `1`. +## output_format_csv_null_representation {#output_format_csv_null_representation} + +Defines the representation of `NULL` for [CSV](../../interfaces/formats.md#csv) output format. User can set any string as a value, for example, `My NULL`. + +Default value: `\N`. + +**Examples** + +Query + +```sql +SELECT * from csv_custom_null FORMAT CSV; +``` + +Result + +```text +788 +\N +\N +``` + +Query + +```sql +SET output_format_csv_null_representation = 'My NULL'; +SELECT * FROM csv_custom_null FORMAT CSV; +``` + +Result + +```text +788 +My NULL +My NULL +``` + ## output_format_tsv_null_representation {#output_format_tsv_null_representation} Defines the representation of `NULL` for [TSV](../../interfaces/formats.md#tabseparated) output format. User can set any string as a value, for example, `My NULL`. diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 0e29168f906..58c100e7b64 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -574,6 +574,7 @@ class IColumn; M(String, output_format_avro_codec, "", "Compression codec used for output. Possible values: 'null', 'deflate', 'snappy'.", 0) \ M(UInt64, output_format_avro_sync_interval, 16 * 1024, "Sync interval in bytes.", 0) \ M(Bool, output_format_tsv_crlf_end_of_line, false, "If it is set true, end of line in TSV format will be \\r\\n instead of \\n.", 0) \ + M(String, output_format_csv_null_representation, "\\N", "Custom NULL representation in CSV format", 0) \ M(String, output_format_tsv_null_representation, "\\N", "Custom NULL representation in TSV format", 0) \ M(Bool, output_format_decimal_trailing_zeros, false, "Output trailing zeros when printing Decimal values. E.g. 1.230000 instead of 1.23.", 0) \ \ diff --git a/src/DataTypes/Serializations/SerializationNullable.cpp b/src/DataTypes/Serializations/SerializationNullable.cpp index 4de2b08c043..b607d5871d6 100644 --- a/src/DataTypes/Serializations/SerializationNullable.cpp +++ b/src/DataTypes/Serializations/SerializationNullable.cpp @@ -327,7 +327,7 @@ void SerializationNullable::serializeTextCSV(const IColumn & column, size_t row_ const ColumnNullable & col = assert_cast(column); if (col.isNullAt(row_num)) - writeCString("\\N", ostr); + writeString(settings.csv.null_representation, ostr); else nested->serializeTextCSV(col.getNestedColumn(), row_num, ostr, settings); } diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index a014f85d45f..24132e9c585 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -60,6 +60,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings) format_settings.csv.delimiter = settings.format_csv_delimiter; format_settings.csv.empty_as_default = settings.input_format_defaults_for_omitted_fields; format_settings.csv.input_format_enum_as_number = settings.input_format_csv_enum_as_number; + format_settings.csv.null_representation = settings.output_format_csv_null_representation; format_settings.csv.unquoted_null_literal_as_null = settings.input_format_csv_unquoted_null_literal_as_null; format_settings.csv.input_format_arrays_as_nested_csv = settings.input_format_csv_arrays_as_nested_csv; format_settings.custom.escaping_rule = settings.format_custom_escaping_rule; diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h index 3e1e00584c0..3a274f99a5c 100644 --- a/src/Formats/FormatSettings.h +++ b/src/Formats/FormatSettings.h @@ -76,6 +76,7 @@ struct FormatSettings bool crlf_end_of_line = false; bool input_format_enum_as_number = false; bool input_format_arrays_as_nested_csv = false; + String null_representation = "\\N"; } csv; struct Custom diff --git a/tests/queries/0_stateless/02029_output_csv_null_representation.reference b/tests/queries/0_stateless/02029_output_csv_null_representation.reference new file mode 100644 index 00000000000..a5174f4424f --- /dev/null +++ b/tests/queries/0_stateless/02029_output_csv_null_representation.reference @@ -0,0 +1,4 @@ +# output_format_csv_null_representation should initially be \\N +"val1",\N,"val3" +# Changing output_format_csv_null_representation +"val1",∅,"val3" diff --git a/tests/queries/0_stateless/02029_output_csv_null_representation.sql b/tests/queries/0_stateless/02029_output_csv_null_representation.sql new file mode 100644 index 00000000000..772c6c89144 --- /dev/null +++ b/tests/queries/0_stateless/02029_output_csv_null_representation.sql @@ -0,0 +1,16 @@ +DROP TABLE IF EXISTS test_data; +CREATE TABLE test_data ( + col1 Nullable(String), + col2 Nullable(String), + col3 Nullable(String) +) ENGINE = Memory; + +INSERT INTO test_data VALUES ('val1', NULL, 'val3'); + +SELECT '# output_format_csv_null_representation should initially be \\N'; +SELECT * FROM test_data FORMAT CSV; + +SELECT '# Changing output_format_csv_null_representation'; +SET output_format_csv_null_representation = '∅'; +SELECT * FROM test_data FORMAT CSV; +SET output_format_csv_null_representation = '\\N'; From b108c5e1b0f7383d572e0a8d10fde809c99dca47 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Tue, 7 Sep 2021 22:32:48 +0300 Subject: [PATCH 44/80] Implement max_suspicious_broken_parts_bytes Usually broken parts are found from just created parts, which are usually small enough, so it make sense to have a limit in bytes for broken parts. Default value was set to 1G. --- src/Storages/MergeTree/MergeTreeData.cpp | 34 +++++++++++++++------- src/Storages/MergeTree/MergeTreeSettings.h | 1 + 2 files changed, 24 insertions(+), 11 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index ee1387af49b..00e7cb09137 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -978,6 +978,7 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks) DataPartsVector broken_parts_to_detach; size_t suspicious_broken_parts = 0; + size_t suspicious_broken_parts_bytes = 0; std::atomic has_adaptive_parts = false; std::atomic has_non_adaptive_parts = false; @@ -1004,17 +1005,18 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks) if (part_disk_ptr->exists(marker_path)) { + /// NOTE: getBytesOnDisk() cannot be used here, since it maybe zero of checksums.txt will not exist + size_t size_of_part = IMergeTreeDataPart::calculateTotalSizeOnDisk(part->volume->getDisk(), part->getFullRelativePath()); LOG_WARNING(log, - "Detaching stale part {}{}, which should have been deleted after a move. That can only happen " - "after unclean restart of ClickHouse after move of a part having an operation blocking that " - "stale copy of part.", - getFullPathOnDisk(part_disk_ptr), part_name); - + "Detaching stale part {}{} (size: {}), which should have been deleted after a move. " + "That can only happen after unclean restart of ClickHouse after move of a part having an operation blocking that stale copy of part.", + getFullPathOnDisk(part_disk_ptr), part_name, formatReadableSizeWithBinarySuffix(size_of_part)); std::lock_guard loading_lock(mutex); broken_parts_to_detach.push_back(part); ++suspicious_broken_parts; + suspicious_broken_parts_bytes += size_of_part; return; } @@ -1043,16 +1045,20 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks) /// Ignore broken parts that can appear as a result of hard server restart. if (broken) { - LOG_ERROR(log, - "Detaching broken part {}{}. If it happened after update, it is likely because of backward " - "incompatibility. You need to resolve this manually", - getFullPathOnDisk(part_disk_ptr), part_name); + /// NOTE: getBytesOnDisk() cannot be used here, since it maybe zero of checksums.txt will not exist + size_t size_of_part = IMergeTreeDataPart::calculateTotalSizeOnDisk(part->volume->getDisk(), part->getFullRelativePath()); + LOG_ERROR(log, + "Detaching broken part {}{} (size: {}). " + "If it happened after update, it is likely because of backward incompability. " + "You need to resolve this manually", + getFullPathOnDisk(part_disk_ptr), part_name, formatReadableSizeWithBinarySuffix(size_of_part)); std::lock_guard loading_lock(mutex); broken_parts_to_detach.push_back(part); ++suspicious_broken_parts; + suspicious_broken_parts_bytes += size_of_part; return; } @@ -1099,8 +1105,14 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks) has_non_adaptive_index_granularity_parts = has_non_adaptive_parts; if (suspicious_broken_parts > settings->max_suspicious_broken_parts && !skip_sanity_checks) - throw Exception("Suspiciously many (" + toString(suspicious_broken_parts) + ") broken parts to remove.", - ErrorCodes::TOO_MANY_UNEXPECTED_DATA_PARTS); + throw Exception(ErrorCodes::TOO_MANY_UNEXPECTED_DATA_PARTS, + "Suspiciously many ({}) broken parts to remove.", + suspicious_broken_parts); + + if (suspicious_broken_parts_bytes > settings->max_suspicious_broken_parts_bytes && !skip_sanity_checks) + throw Exception(ErrorCodes::TOO_MANY_UNEXPECTED_DATA_PARTS, + "Suspiciously big size ({}) of all broken parts to remove.", + formatReadableSizeWithBinarySuffix(suspicious_broken_parts_bytes)); for (auto & part : broken_parts_to_detach) part->renameToDetached("broken-on-start"); /// detached parts must not have '_' in prefixes diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h index 889b89b9a27..92a892c963f 100644 --- a/src/Storages/MergeTree/MergeTreeSettings.h +++ b/src/Storages/MergeTree/MergeTreeSettings.h @@ -79,6 +79,7 @@ struct Settings; M(Seconds, try_fetch_recompressed_part_timeout, 7200, "Recompression works slow in most cases, so we don't start merge with recompression until this timeout and trying to fetch recompressed part from replica which assigned this merge with recompression.", 0) \ M(Bool, always_fetch_merged_part, false, "If true, replica never merge parts and always download merged parts from other replicas.", 0) \ M(UInt64, max_suspicious_broken_parts, 10, "Max broken parts, if more - deny automatic deletion.", 0) \ + M(UInt64, max_suspicious_broken_parts_bytes, 1ULL * 1024 * 1024 * 1024, "Max size of all broken parts, if more - deny automatic deletion.", 0) \ M(UInt64, max_files_to_modify_in_alter_columns, 75, "Not apply ALTER if number of files for modification(deletion, addition) more than this.", 0) \ M(UInt64, max_files_to_remove_in_alter_columns, 50, "Not apply ALTER, if number of files for deletion more than this.", 0) \ M(Float, replicated_max_ratio_of_wrong_parts, 0.5, "If ratio of wrong parts to total number of parts is less than this - allow to start.", 0) \ From a613382d48d25a4e0da4c5a69fa5b1c7e15e6f31 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Wed, 15 Sep 2021 21:18:06 +0300 Subject: [PATCH 45/80] Cover max_suspicious_broken_parts/max_suspicious_broken_parts_bytes --- .../__init__.py | 0 .../test_max_suspicious_broken_parts/test.py | 121 ++++++++++++++++++ 2 files changed, 121 insertions(+) create mode 100644 tests/integration/test_max_suspicious_broken_parts/__init__.py create mode 100644 tests/integration/test_max_suspicious_broken_parts/test.py diff --git a/tests/integration/test_max_suspicious_broken_parts/__init__.py b/tests/integration/test_max_suspicious_broken_parts/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_max_suspicious_broken_parts/test.py b/tests/integration/test_max_suspicious_broken_parts/test.py new file mode 100644 index 00000000000..31f53fdbc3c --- /dev/null +++ b/tests/integration/test_max_suspicious_broken_parts/test.py @@ -0,0 +1,121 @@ +# pylint: disable=unused-argument +# pylint: disable=redefined-outer-name +# pylint: disable=line-too-long + +import pytest + +from helpers.client import QueryRuntimeException +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__) +node = cluster.add_instance('node', stay_alive=True) + +@pytest.fixture(scope='module', autouse=True) +def start_cluster(): + try: + cluster.start() + yield cluster + finally: + cluster.shutdown() + +def break_part(table, part_name): + node.exec_in_container(['bash', '-c', f'rm /var/lib/clickhouse/data/default/{table}/{part_name}/columns.txt']) + +def remove_part(table, part_name): + node.exec_in_container(['bash', '-c', f'rm -r /var/lib/clickhouse/data/default/{table}/{part_name}']) + +def get_count(table): + return int(node.query(f'SELECT count() FROM {table}').strip()) + +def detach_table(table): + node.query(f'DETACH TABLE {table}') +def attach_table(table): + node.query(f'ATTACH TABLE {table}') + +def check_table(table): + rows = 900 + per_part_rows = 90 + + node.query(f'INSERT INTO {table} SELECT * FROM numbers(900)') + + assert get_count(table) == rows + + # break one part, and check that clickhouse will be alive + break_part(table, '0_1_1_0') + rows -= per_part_rows + detach_table(table) + attach_table(table) + assert get_count(table) == rows + + # break two parts, and check that clickhouse will not start + break_part(table, '1_2_2_0') + break_part(table, '2_3_3_0') + rows -= per_part_rows*2 + detach_table(table) + with pytest.raises(QueryRuntimeException): + attach_table(table) + + # now remove one part, and check + remove_part(table, '1_2_2_0') + attach_table(table) + assert get_count(table) == rows + + node.query(f'DROP TABLE {table}') + +def test_max_suspicious_broken_parts(): + node.query(""" + CREATE TABLE test_max_suspicious_broken_parts ( + key Int + ) + ENGINE=MergeTree + ORDER BY key + PARTITION BY key%10 + SETTINGS + max_suspicious_broken_parts = 1; + """) + check_table('test_max_suspicious_broken_parts') + +def test_max_suspicious_broken_parts_bytes(): + node.query(""" + CREATE TABLE test_max_suspicious_broken_parts_bytes ( + key Int + ) + ENGINE=MergeTree + ORDER BY key + PARTITION BY key%10 + SETTINGS + max_suspicious_broken_parts = 10, + /* one part takes ~751 byte, so we allow failure of one part with these limit */ + max_suspicious_broken_parts_bytes = 1000; + """) + check_table('test_max_suspicious_broken_parts_bytes') + +def test_max_suspicious_broken_parts__wide(): + node.query(""" + CREATE TABLE test_max_suspicious_broken_parts__wide ( + key Int + ) + ENGINE=MergeTree + ORDER BY key + PARTITION BY key%10 + SETTINGS + min_bytes_for_wide_part = 0, + max_suspicious_broken_parts = 1; + """) + check_table('test_max_suspicious_broken_parts__wide') + +def test_max_suspicious_broken_parts_bytes__wide(): + node.query(""" + CREATE TABLE test_max_suspicious_broken_parts_bytes__wide ( + key Int + ) + ENGINE=MergeTree + ORDER BY key + PARTITION BY key%10 + SETTINGS + min_bytes_for_wide_part = 0, + max_suspicious_broken_parts = 10, + /* one part takes ~750 byte, so we allow failure of one part with these limit */ + max_suspicious_broken_parts_bytes = 1000; + """) + check_table('test_max_suspicious_broken_parts_bytes__wide') From 749b91347dfbd21248b046df7179aa2403943552 Mon Sep 17 00:00:00 2001 From: tavplubix Date: Fri, 17 Sep 2021 22:39:54 +0300 Subject: [PATCH 46/80] Update PostgreSQLDictionarySource.cpp --- src/Dictionaries/PostgreSQLDictionarySource.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Dictionaries/PostgreSQLDictionarySource.cpp b/src/Dictionaries/PostgreSQLDictionarySource.cpp index 50be5592918..ae153eaed53 100644 --- a/src/Dictionaries/PostgreSQLDictionarySource.cpp +++ b/src/Dictionaries/PostgreSQLDictionarySource.cpp @@ -32,7 +32,7 @@ namespace { QualifiedTableName qualified_name{schema, table}; - if (qualified_name.database.empty()) + if (qualified_name.database.empty() && !qualified_name.table.empty()) qualified_name = QualifiedTableName::parseFromString(qualified_name.table); /// Do not need db because it is already in a connection string. From 091ce151bfa9a2208243997432f9388277aca76e Mon Sep 17 00:00:00 2001 From: vdimir Date: Fri, 17 Sep 2021 14:30:45 +0300 Subject: [PATCH 47/80] Avoid cycles in optimizeFuseQuantileFunctions --- src/Interpreters/GatherFunctionQuantileVisitor.cpp | 8 ++++++++ src/Interpreters/GatherFunctionQuantileVisitor.h | 3 ++- .../0_stateless/01956_fuse_quantile_optimization.sql | 2 ++ 3 files changed, 12 insertions(+), 1 deletion(-) diff --git a/src/Interpreters/GatherFunctionQuantileVisitor.cpp b/src/Interpreters/GatherFunctionQuantileVisitor.cpp index 74a2312b769..fab03c232df 100644 --- a/src/Interpreters/GatherFunctionQuantileVisitor.cpp +++ b/src/Interpreters/GatherFunctionQuantileVisitor.cpp @@ -71,5 +71,13 @@ void GatherFunctionQuantileData::FuseQuantileAggregatesData::addFuncNode(ASTPtr arg_map_function[arg_name].push_back(&ast); } +bool GatherFunctionQuantileData::needChild(const ASTPtr & node, const ASTPtr &) +{ + /// Skip children of quantile* functions to escape cycles in further processing + if (const auto * func = node ? node->as() : nullptr) + return !quantile_fuse_name_mapping.contains(func->name); + return true; +} + } diff --git a/src/Interpreters/GatherFunctionQuantileVisitor.h b/src/Interpreters/GatherFunctionQuantileVisitor.h index 188dad4731f..19f092720af 100644 --- a/src/Interpreters/GatherFunctionQuantileVisitor.h +++ b/src/Interpreters/GatherFunctionQuantileVisitor.h @@ -27,8 +27,9 @@ public: static String getFusedName(const String & func_name); + static bool needChild(const ASTPtr & node, const ASTPtr &); }; -using GatherFunctionQuantileVisitor = InDepthNodeVisitor, true>; +using GatherFunctionQuantileVisitor = InDepthNodeVisitor, true>; } diff --git a/tests/queries/0_stateless/01956_fuse_quantile_optimization.sql b/tests/queries/0_stateless/01956_fuse_quantile_optimization.sql index df01e99110d..886867cb3cc 100644 --- a/tests/queries/0_stateless/01956_fuse_quantile_optimization.sql +++ b/tests/queries/0_stateless/01956_fuse_quantile_optimization.sql @@ -62,5 +62,7 @@ SELECT quantileTiming(0.5)(NULL, NULL, quantileTiming(-inf)(NULL), NULL) FROM da SELECT quantileTDigest(NULL)(NULL, quantileTDigest(3.14)(NULL, d + NULL), 2.), NULL FORMAT Null; -- { serverError ILLEGAL_AGGREGATION } SELECT quantile(1, 0.3)(d), quantile(0.3)(d) FROM datetime; -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } SELECT quantile(quantileDeterministic('', '2.47')('0.02', '0.2', NULL), 0.9)(d), quantile(0.3)(d) FROM datetime; -- { serverError ILLEGAL_AGGREGATION } +SELECT quantileTimingWeighted([[[[['-214748364.8'], NULL]], [[[quantileTimingWeighted([[[[['-214748364.8'], NULL], '-922337203.6854775808'], [[['-214748364.7']]], NULL]])([NULL], NULL), '-214748364.7']]], NULL]])([NULL], NULL); -- { serverError ILLEGAL_AGGREGATION } +SELECT quantileTimingWeighted([quantileTimingWeighted(0.5)(1, 1)])(1, 1); -- { serverError ILLEGAL_AGGREGATION } DROP TABLE datetime; From 5dcdd9c44d3bcec2df5a29fa056ab4c9f4dec9c9 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Mon, 20 Sep 2021 13:40:21 +0300 Subject: [PATCH 48/80] Update single_page.py --- docs/tools/single_page.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/docs/tools/single_page.py b/docs/tools/single_page.py index 5c7ea00105c..0e82a1acb87 100644 --- a/docs/tools/single_page.py +++ b/docs/tools/single_page.py @@ -219,7 +219,10 @@ def build_single_page_version(lang, args, nav, cfg): ] logging.info(' '.join(create_pdf_command)) - subprocess.check_call(' '.join(create_pdf_command), shell=True) + try: + subprocess.check_call(' '.join(create_pdf_command), shell=True) + except: + pass # TODO: fix pdf issues logging.info(f'Finished building single page version for {lang}') From d063266a6e25b9e7b0329ac8f5882ed42dfa8f28 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Mon, 20 Sep 2021 15:40:04 +0300 Subject: [PATCH 49/80] Update website.py --- docs/tools/website.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/docs/tools/website.py b/docs/tools/website.py index 2e0d0974a5d..c8e763d90a4 100644 --- a/docs/tools/website.py +++ b/docs/tools/website.py @@ -217,8 +217,9 @@ def minify_file(path, css_digest, js_digest): content = content.replace('base.js?js_digest', f'base.js?{js_digest}') elif path.endswith('.css'): content = cssmin.cssmin(content) - elif path.endswith('.js'): - content = jsmin.jsmin(content) +# TODO: restore jsmin +# elif path.endswith('.js'): +# content = jsmin.jsmin(content) with open(path, 'wb') as f: f.write(content.encode('utf-8')) From 36f56b6807ffdb5a525da251fc569a970a60d32a Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 20 Sep 2021 15:46:49 +0300 Subject: [PATCH 50/80] Add GitHub links for the team --- website/templates/company/founders.html | 16 ++-- website/templates/company/team.html | 119 ++++++++++++------------ 2 files changed, 67 insertions(+), 68 deletions(-) diff --git a/website/templates/company/founders.html b/website/templates/company/founders.html index dbff295af1e..95ed738f12d 100644 --- a/website/templates/company/founders.html +++ b/website/templates/company/founders.html @@ -1,6 +1,6 @@
- +

{{ _('Meet the Team') }}

@@ -11,7 +11,7 @@
- + @@ -21,10 +21,10 @@

{{ _('Co-Founder & President, Product and Engineering') }}

- +
- + @@ -34,11 +34,11 @@

{{ _('Co-Founder & CEO') }}

- +
- - + +

@@ -47,7 +47,7 @@

{{ _('Co-Founder & CTO') }}

- +

diff --git a/website/templates/company/team.html b/website/templates/company/team.html index 28e7b622302..766d136af8d 100644 --- a/website/templates/company/team.html +++ b/website/templates/company/team.html @@ -1,27 +1,27 @@
- +

{{ _('ClickHouse Team') }}

- -
+ + -
+

{{ _('Vitaly Baranov') }}

{{ _('Principal Sofware Engineer') }}

- +
- + @@ -31,10 +31,10 @@

{{ _('VP, Product') }}

- +
- + @@ -44,11 +44,11 @@

{{ _('Adviser, Security, Privacy & Compliance') }}

- +
- - + +

@@ -57,10 +57,10 @@

{{ _('Software Engineer') }}

- +

- + @@ -70,10 +70,10 @@

{{ _('Senior Director, Business Technology') }}

- +
- + @@ -83,11 +83,11 @@

{{ _('VP, Sales') }}

- +
{% if false %}
- +
@@ -97,12 +97,12 @@

{{ _('Account Executive') }}

- +
{% endif %}
- - + +

@@ -111,24 +111,24 @@

{{ _('Senior Software Engineer') }}

- +

- -
+ + -
+

{{ _('Nikolai Kochetov') }}

{{ _('Engineering Team Lead') }}

- +
{% if false %}
- +
@@ -138,12 +138,12 @@

{{ _('Senior Recruiter') }}

- +
{% endif %}
- - + +

@@ -152,11 +152,11 @@

{{ _('Software Engineer') }}

- +

{% if false %}
- +
@@ -164,14 +164,14 @@ {{ _('Claire Lucas') }}

- {{ _('Director, Global Business Strategy & Operations') }} + {{ _('Director, Global Business Strategy & Operations') }}

- +
{% endif %}
- - + +

@@ -180,11 +180,11 @@

{{ _('Software Engineer') }}

- +

- - + +

@@ -193,10 +193,10 @@

{{ _('Senior Software Engineer') }}

- +

- + @@ -206,11 +206,11 @@

{{ _('VP, Support & Services') }}

- +
- - + +

@@ -219,10 +219,10 @@

{{ _('Senior Software Engineer') }}

- +

- + @@ -232,12 +232,11 @@

{{ _('Director, Global Learning') }}

- +
- - +

@@ -246,11 +245,11 @@

{{ _('Engineering Team Lead') }}

- +

- - + +

@@ -259,10 +258,10 @@

{{ _('Software Engineer') }}

- +

- + @@ -272,11 +271,11 @@

{{ _('VP, Operations') }}

- +
- - + +

@@ -285,10 +284,10 @@

{{ _('Software Engineer') }}

- +

- + @@ -298,7 +297,7 @@

{{ _('VP, EMEA') }}

- +
@@ -311,7 +310,7 @@

{{ _('Senior Technical Project Manager') }}

- +
From d23ccf016bc55fb75c135f2a96f50a8e877a78d0 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 20 Sep 2021 15:50:05 +0300 Subject: [PATCH 51/80] Fix HTML --- website/templates/company/team.html | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/website/templates/company/team.html b/website/templates/company/team.html index 766d136af8d..8b4c4e26774 100644 --- a/website/templates/company/team.html +++ b/website/templates/company/team.html @@ -42,7 +42,7 @@ {{ _('Jason Chan') }}

- {{ _('Adviser, Security, Privacy & Compliance') }} + {{ _('Adviser, Security, Privacy & Compliance') }}

@@ -204,7 +204,7 @@ {{ _('Thom O’Connor') }}

- {{ _('VP, Support & Services') }} + {{ _('VP, Support & Services') }}

From 14c394daffb36e49b943cb3e62524d7c073929c8 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Mon, 20 Sep 2021 16:00:58 +0300 Subject: [PATCH 52/80] Update success.html --- website/templates/index/success.html | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/website/templates/index/success.html b/website/templates/index/success.html index 3249eabc1ee..8ab32b06ac9 100644 --- a/website/templates/index/success.html +++ b/website/templates/index/success.html @@ -64,7 +64,7 @@

{{ _('Uber moved it’s logging platform to ClickHouse increasing developer productivity and overall reliability of the platform while seeing 3x data compression, 10x performance increase, and ½ the reduction in hardware cost.') }}

- {{ _('Read the Case Study') }} + {{ _('Read the Case Study') }}
@@ -103,7 +103,7 @@

{{ _('eBay adopted ClickHouse for their real time OLAP events (Logs + Metrics) infrastructure. The simplified architecture with ClickHouse allowed them to reduce their DevOps activity and troubleshooting, reduced the overall infrastructure by 90%%, and they saw a stronger integration with Grafana and ClickHouse for visualization and alerting.') }}

- {{ _('Read the Case Study') }} + {{ _('Read the Case Study') }}
@@ -142,7 +142,7 @@

{{ _('Cloudflare was having challenges scaling their CitusDB-based system which had a high TCO and maintenance costs due to the complex architecture. By moving their HTTP analytics data to ClickHouse they were able to scale to 8M requests per second, deleted 10’s of thousands of lines of code, reduced their MTTR, and saw a 7x improvement on customer queries per second they could serve.') }}

- {{ _('Read the Case Study') }} + {{ _('Read the Case Study') }}
@@ -181,7 +181,7 @@

{{ _('Spotify\'s A/B Experimentation platform is serving thousands of sub-second queries per second on petabyte-scale datasets with Clickhouse. They reduced the amount of low-variance work by an order of magnitude and enabled feature teams to self-serve insights by introducing a unified SQL interface for Data Platform and tools for automatic decision making for Experimentation.') }}

- {{ _('Read the Case Study') }} + {{ _('Read the Case Study') }}
@@ -220,7 +220,7 @@

{{ _('ClickHouse helps serve the Client Analytics platform for reporting, deep data analysis as well as advanced data science to provide Deutsche Bank’s front office a clear view on their client\'s activity and profitability.') }}

- {{ _('Read the Case Study') }} + {{ _('Read the Case Study') }}
From 99a865349796146ecf52ccc4bca384685490ba75 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Mon, 20 Sep 2021 16:30:08 +0300 Subject: [PATCH 53/80] Update website.py --- docs/tools/website.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/docs/tools/website.py b/docs/tools/website.py index c8e763d90a4..784df35b0b7 100644 --- a/docs/tools/website.py +++ b/docs/tools/website.py @@ -215,8 +215,9 @@ def minify_file(path, css_digest, js_digest): content = minify_html(content) content = content.replace('base.css?css_digest', f'base.css?{css_digest}') content = content.replace('base.js?js_digest', f'base.js?{js_digest}') - elif path.endswith('.css'): - content = cssmin.cssmin(content) +# TODO: restore cssmin +# elif path.endswith('.css'): +# content = cssmin.cssmin(content) # TODO: restore jsmin # elif path.endswith('.js'): # content = jsmin.jsmin(content) From dea0d46dfdcf12f6e68ad887d24805195472ce84 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Mon, 20 Sep 2021 16:52:47 +0300 Subject: [PATCH 54/80] Update base.js --- website/js/base.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/website/js/base.js b/website/js/base.js index 1debd0f780c..52b801eb98f 100644 --- a/website/js/base.js +++ b/website/js/base.js @@ -1,7 +1,7 @@ (function () { Sentry.init({ dsn: 'https://2b95b52c943f4ad99baccab7a9048e4d@o388870.ingest.sentry.io/5246103', - environment: window.location.hostname === 'clickhouse.tech' ? 'prod' : 'test' + environment: window.location.hostname === 'clickhouse.com' ? 'prod' : 'test' }); $(document).click(function (event) { var target = $(event.target); @@ -95,7 +95,7 @@ s.type = "text/javascript"; s.async = true; s.src = "/js/metrika.js"; - if (window.location.hostname.endsWith('clickhouse.tech')) { + if (window.location.hostname.endsWith('clickhouse.com')) { if (w.opera == "[object Opera]") { d.addEventListener("DOMContentLoaded", f, false); } else { From 8adfb9b593bddb03a033391192c98631cd48f646 Mon Sep 17 00:00:00 2001 From: tavplubix Date: Mon, 20 Sep 2021 17:23:10 +0300 Subject: [PATCH 55/80] Stop ThreadFuzzer before hung check (#29167) * stop ThreadFuzzer before hung check * fix * fix --- docker/test/stress/stress | 3 +++ src/Common/ThreadFuzzer.cpp | 21 +++++++++++++++++++++ src/Common/ThreadFuzzer.h | 8 +++++++- src/Interpreters/InterpreterSystemQuery.cpp | 9 +++++++++ src/Parsers/ASTSystemQuery.h | 2 ++ 5 files changed, 42 insertions(+), 1 deletion(-) diff --git a/docker/test/stress/stress b/docker/test/stress/stress index 73a84ad4c40..8fc4ade2da6 100755 --- a/docker/test/stress/stress +++ b/docker/test/stress/stress @@ -70,6 +70,9 @@ def compress_stress_logs(output_path, files_prefix): def prepare_for_hung_check(drop_databases): # FIXME this function should not exist, but... + # ThreadFuzzer significantly slows down server and causes false-positive hung check failures + call("clickhouse client -q 'SYSTEM STOP THREAD FUZZER'", shell=True, stderr=STDOUT) + # We attach gdb to clickhouse-server before running tests # to print stacktraces of all crashes even if clickhouse cannot print it for some reason. # However, it obstruct checking for hung queries. diff --git a/src/Common/ThreadFuzzer.cpp b/src/Common/ThreadFuzzer.cpp index 9963a5308c3..896d8ee4e62 100644 --- a/src/Common/ThreadFuzzer.cpp +++ b/src/Common/ThreadFuzzer.cpp @@ -128,6 +128,9 @@ void ThreadFuzzer::initConfiguration() bool ThreadFuzzer::isEffective() const { + if (!isStarted()) + return false; + #if THREAD_FUZZER_WRAP_PTHREAD # define CHECK_WRAPPER_PARAMS(RET, NAME, ...) \ if (NAME##_before_yield_probability.load(std::memory_order_relaxed)) \ @@ -159,6 +162,20 @@ bool ThreadFuzzer::isEffective() const || (sleep_probability > 0 && sleep_time_us > 0)); } +void ThreadFuzzer::stop() +{ + started.store(false, std::memory_order_relaxed); +} + +void ThreadFuzzer::start() +{ + started.store(true, std::memory_order_relaxed); +} + +bool ThreadFuzzer::isStarted() +{ + return started.load(std::memory_order_relaxed); +} static void injection( double yield_probability, @@ -166,6 +183,10 @@ static void injection( double sleep_probability, double sleep_time_us [[maybe_unused]]) { + DENY_ALLOCATIONS_IN_SCOPE; + if (!ThreadFuzzer::isStarted()) + return; + if (yield_probability > 0 && std::bernoulli_distribution(yield_probability)(thread_local_rng)) { diff --git a/src/Common/ThreadFuzzer.h b/src/Common/ThreadFuzzer.h index 1a9e98ca674..743b8c75dc0 100644 --- a/src/Common/ThreadFuzzer.h +++ b/src/Common/ThreadFuzzer.h @@ -1,6 +1,6 @@ #pragma once #include - +#include namespace DB { @@ -54,6 +54,10 @@ public: bool isEffective() const; + static void stop(); + static void start(); + static bool isStarted(); + private: uint64_t cpu_time_period_us = 0; double yield_probability = 0; @@ -61,6 +65,8 @@ private: double sleep_probability = 0; double sleep_time_us = 0; + inline static std::atomic started{true}; + ThreadFuzzer(); void initConfiguration(); diff --git a/src/Interpreters/InterpreterSystemQuery.cpp b/src/Interpreters/InterpreterSystemQuery.cpp index 226ff124cfb..18d31e2f89c 100644 --- a/src/Interpreters/InterpreterSystemQuery.cpp +++ b/src/Interpreters/InterpreterSystemQuery.cpp @@ -40,6 +40,7 @@ #include #include #include +#include #include #include @@ -445,6 +446,12 @@ BlockIO InterpreterSystemQuery::execute() case Type::STOP_LISTEN_QUERIES: case Type::START_LISTEN_QUERIES: throw Exception(ErrorCodes::NOT_IMPLEMENTED, "{} is not supported yet", query.type); + case Type::STOP_THREAD_FUZZER: + ThreadFuzzer::stop(); + break; + case Type::START_THREAD_FUZZER: + ThreadFuzzer::start(); + break; default: throw Exception("Unknown type of SYSTEM query", ErrorCodes::BAD_ARGUMENTS); } @@ -877,6 +884,8 @@ AccessRightsElements InterpreterSystemQuery::getRequiredAccessForDDLOnCluster() } case Type::STOP_LISTEN_QUERIES: break; case Type::START_LISTEN_QUERIES: break; + case Type::STOP_THREAD_FUZZER: break; + case Type::START_THREAD_FUZZER: break; case Type::UNKNOWN: break; case Type::END: break; } diff --git a/src/Parsers/ASTSystemQuery.h b/src/Parsers/ASTSystemQuery.h index f55ccc59160..cf020ecee13 100644 --- a/src/Parsers/ASTSystemQuery.h +++ b/src/Parsers/ASTSystemQuery.h @@ -61,6 +61,8 @@ public: FLUSH_DISTRIBUTED, STOP_DISTRIBUTED_SENDS, START_DISTRIBUTED_SENDS, + START_THREAD_FUZZER, + STOP_THREAD_FUZZER, END }; From dcd6342fb4953bb324ee823586f180970ec84e67 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Mon, 20 Sep 2021 17:24:10 +0300 Subject: [PATCH 56/80] Update overview.html --- website/templates/company/overview.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/website/templates/company/overview.html b/website/templates/company/overview.html index 98aed9ffd86..e5759227c86 100644 --- a/website/templates/company/overview.html +++ b/website/templates/company/overview.html @@ -2,7 +2,7 @@

- Creators of the online analytical processing (OLAP) database management system ClickHouse have announced their decision to officially incorporate as a company. The creator of ClickHouse, Alexey Milovidov (CTO), will be joined by co-founders and seasoned enterprise software executives, Yury Izrailevsky (President, Product and Engineering) and Aaron Katz (CEO), along with nearly $50M in Series A funding by Index Ventures and Benchmark. + Creators of the online analytical processing (OLAP) database management system ClickHouse have announced their decision to officially incorporate as a company. The creator of ClickHouse, Alexey Milovidov (CTO), will be joined by co-founders and seasoned enterprise software executives, Yury Izrailevsky (President, Product and Engineering) and Aaron Katz (CEO), along with nearly $50M in Series A funding led by Index Ventures and Benchmark.

From d3f4d935e6613cac4aee73da4d2350726de041b5 Mon Sep 17 00:00:00 2001 From: Tigran Khudaverdyan <91077115+tigrankhudaverdyan@users.noreply.github.com> Date: Mon, 20 Sep 2021 18:04:49 +0300 Subject: [PATCH 57/80] Change copyright --- LICENSE | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/LICENSE b/LICENSE index 9167b80e269..c46bc7d19e1 100644 --- a/LICENSE +++ b/LICENSE @@ -1,4 +1,4 @@ -Copyright 2016-2021 Yandex LLC +Copyright 2016-2021 ClickHouse, Inc. Apache License Version 2.0, January 2004 From 56e40e1ff465ab188bb2ccaf4e0fa630541543c5 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Mon, 20 Sep 2021 18:15:23 +0300 Subject: [PATCH 58/80] ParserSystemQuery fix --- src/Parsers/ASTSystemQuery.cpp | 32 ++++++++++++++++++++++++++++++- src/Parsers/ASTSystemQuery.h | 4 +++- src/Parsers/ParserSystemQuery.cpp | 13 ++++--------- 3 files changed, 38 insertions(+), 11 deletions(-) diff --git a/src/Parsers/ASTSystemQuery.cpp b/src/Parsers/ASTSystemQuery.cpp index 7575f2718df..8305c451de2 100644 --- a/src/Parsers/ASTSystemQuery.cpp +++ b/src/Parsers/ASTSystemQuery.cpp @@ -7,10 +7,40 @@ namespace DB { + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + +namespace +{ + std::vector getTypeIndexToTypeName() + { + constexpr std::size_t types_size = magic_enum::enum_count(); + + std::vector type_index_to_type_name; + type_index_to_type_name.resize(types_size); + + auto entries = magic_enum::enum_entries(); + for (const auto & [entry, str] : entries) + type_index_to_type_name[static_cast(entry)] = str; + + return type_index_to_type_name; + } +} + +const char * ASTSystemQuery::typeToString(Type type) +{ + static std::vector type_index_to_type_name = getTypeIndexToTypeName(); + const auto & type_name = type_index_to_type_name[static_cast(type)]; + return type_name.data(); +} + void ASTSystemQuery::formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const { settings.ostr << (settings.hilite ? hilite_keyword : "") << "SYSTEM "; - settings.ostr << type << (settings.hilite ? hilite_none : ""); + settings.ostr << typeToString(type) << (settings.hilite ? hilite_none : ""); auto print_database_table = [&] { diff --git a/src/Parsers/ASTSystemQuery.h b/src/Parsers/ASTSystemQuery.h index f55ccc59160..f91a02abf8d 100644 --- a/src/Parsers/ASTSystemQuery.h +++ b/src/Parsers/ASTSystemQuery.h @@ -15,7 +15,7 @@ class ASTSystemQuery : public IAST, public ASTQueryWithOnCluster { public: - enum class Type + enum class Type : UInt64 { UNKNOWN, SHUTDOWN, @@ -64,6 +64,8 @@ public: END }; + static const char * typeToString(Type type); + Type type = Type::UNKNOWN; String target_model; diff --git a/src/Parsers/ParserSystemQuery.cpp b/src/Parsers/ParserSystemQuery.cpp index 5381566263e..5aab3e040e1 100644 --- a/src/Parsers/ParserSystemQuery.cpp +++ b/src/Parsers/ParserSystemQuery.cpp @@ -70,20 +70,15 @@ bool ParserSystemQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected & bool found = false; - // If query is executed on single replica, we want to parse input like FLUSH DISTRIBUTED - // If query is executed on cluster, we also want to parse serialized input like FLUSH_DISTRIBUTED - for (const auto & [entry, str] : magic_enum::enum_entries()) + for (const auto & type : magic_enum::enum_values()) { - String underscore_to_space(str); - std::replace(underscore_to_space.begin(), underscore_to_space.end(), '_', ' '); - - if (ParserKeyword(underscore_to_space).ignore(pos, expected) || ParserKeyword(str).ignore(pos, expected)) + if (ParserKeyword{ASTSystemQuery::typeToString(type)}.ignore(pos, expected)) { - res->type = entry; + res->type = type; found = true; break; } - } + } if (!found) return false; From 7fd7cb72fa2482db4f7316a240dd2f964c12baf8 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Mon, 20 Sep 2021 19:36:15 +0300 Subject: [PATCH 59/80] Update press.html --- website/templates/company/press.html | 49 +++++++++++++++++++++++----- 1 file changed, 41 insertions(+), 8 deletions(-) diff --git a/website/templates/company/press.html b/website/templates/company/press.html index dc6f0f74cf0..8c2c06043b1 100644 --- a/website/templates/company/press.html +++ b/website/templates/company/press.html @@ -1,4 +1,3 @@ -{##

@@ -10,26 +9,60 @@
-
-
+
+

{{ _('9/20/21') }}

- {{ _('ClickHouse, Inc. Announces Incorporation, Along With $50M In Series A Funding') }} + {{ _('ClickHouse, Inc. Announces Incorporation, Along With $50M In Series A Funding') }}

-

+

{{ _('New financing will allow the open source success to build a world-class, commercial-grade cloud solution that’s secure, compliant, and convenient for any customer to use.') }}

- {{ _('Read More') }} - + {{ _('Read More') }} +
+
+

+ {{ _('9/20/21') }} +

+

+ {{ _('Business Insider Exclusive') }} +

+

+ {{ _('The creators of the popular ClickHouse project just raised $50 million from Index and Benchmark to form a company that will take on Splunk and Druid in the white-hot data space.') }} +

+ {{ _('Read More') }} +
+
+

+ {{ _('9/20/21') }} +

+

+ {{ _('Index Ventures Perspective') }} +

+

+ {{ _('Our road to ClickHouse started like a good spy novel, with a complex series of introductions over Telegram, clandestine text conversations spanning months before we finally managed to meet the team “face-to-face” (aka over Zoom).') }} +

+ {{ _('Read More') }}
+
+

+ {{ _('9/20/21') }} +

+

+ {{ _('Yandex Perspective') }} +

+

+ {{ _('Yandex announces the spin off of ClickHouse, Inc., a pioneering company focused on open-source column-oriented database management systems (DBMS) for analytical processing. ') }} +

+ {{ _('Read More') }} +
-##} From e4124e00c9dc9a1ce79bfac78400f48d1788817c Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Mon, 20 Sep 2021 19:36:52 +0300 Subject: [PATCH 60/80] Update press.html --- website/templates/company/press.html | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/website/templates/company/press.html b/website/templates/company/press.html index 8c2c06043b1..13f41f58190 100644 --- a/website/templates/company/press.html +++ b/website/templates/company/press.html @@ -54,12 +54,12 @@ {{ _('9/20/21') }}

- {{ _('Yandex Perspective') }} + {{ _('Yandex Perspective') }}

{{ _('Yandex announces the spin off of ClickHouse, Inc., a pioneering company focused on open-source column-oriented database management systems (DBMS) for analytical processing. ') }}

- {{ _('Read More') }} + {{ _('Read More') }}
From 4087de54390b64bf89d607689240a2cb9fe7e747 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Mon, 20 Sep 2021 19:44:00 +0300 Subject: [PATCH 61/80] follow up to 28373 --- src/Interpreters/ExternalDictionariesLoader.cpp | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/src/Interpreters/ExternalDictionariesLoader.cpp b/src/Interpreters/ExternalDictionariesLoader.cpp index fdd371c5038..bf2ce9e66ee 100644 --- a/src/Interpreters/ExternalDictionariesLoader.cpp +++ b/src/Interpreters/ExternalDictionariesLoader.cpp @@ -106,9 +106,11 @@ std::string ExternalDictionariesLoader::resolveDictionaryNameFromDatabaseCatalog /// Try to split name and get id from associated StorageDictionary. /// If something went wrong, return name as is. + String res = name; + auto qualified_name = QualifiedTableName::tryParseFromString(name); if (!qualified_name) - return name; + return res; if (qualified_name->database.empty()) { @@ -116,9 +118,10 @@ std::string ExternalDictionariesLoader::resolveDictionaryNameFromDatabaseCatalog /// or it's an XML dictionary. bool is_xml_dictionary = has(name); if (is_xml_dictionary) - return name; - else - qualified_name->database = current_database_name; + return res; + + qualified_name->database = current_database_name; + res = current_database_name + '.' + name; } auto [db, table] = DatabaseCatalog::instance().tryGetDatabaseAndTable( @@ -126,13 +129,13 @@ std::string ExternalDictionariesLoader::resolveDictionaryNameFromDatabaseCatalog const_pointer_cast(getContext())); if (!db) - return name; + return res; assert(table); if (db->getUUID() == UUIDHelpers::Nil) - return name; + return res; if (table->getName() != "Dictionary") - return name; + return res; return toString(table->getStorageID().uuid); } From e9aa12c91492687f99e4fdff18ab0afbeedd7b94 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Mon, 20 Sep 2021 20:43:58 +0300 Subject: [PATCH 62/80] Update website.py --- docs/tools/website.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/tools/website.py b/docs/tools/website.py index 784df35b0b7..5e4f48e3441 100644 --- a/docs/tools/website.py +++ b/docs/tools/website.py @@ -242,7 +242,7 @@ def minify_website(args): js_in = get_js_in(args) js_out = f'{args.output_dir}/js/base.js' - if args.minify: + if args.minify and False: # TODO: return closure js_in = [js[1:-1] for js in js_in] closure_args = [ '--js', *js_in, '--js_output_file', js_out, From c5556b5e04e0f99bd49839413378ece3f5100644 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Mon, 20 Sep 2021 20:52:25 +0300 Subject: [PATCH 63/80] Update build.py --- docs/tools/build.py | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/tools/build.py b/docs/tools/build.py index 025cf348c1f..3ea6d3e38c7 100755 --- a/docs/tools/build.py +++ b/docs/tools/build.py @@ -203,6 +203,7 @@ if __name__ == '__main__': arg_parser.add_argument('--verbose', action='store_true') args = arg_parser.parse_args() + args.minify = False # TODO remove logging.basicConfig( level=logging.DEBUG if args.verbose else logging.INFO, From 494ec4aecca9bf3b4412e7f1b94a56ea04fa7637 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Mon, 20 Sep 2021 21:33:25 +0300 Subject: [PATCH 64/80] Fixed tests --- src/Parsers/ASTSystemQuery.cpp | 13 ++++++------- src/Parsers/ParserSystemQuery.cpp | 4 ++-- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/src/Parsers/ASTSystemQuery.cpp b/src/Parsers/ASTSystemQuery.cpp index 8305c451de2..f50bcf5041f 100644 --- a/src/Parsers/ASTSystemQuery.cpp +++ b/src/Parsers/ASTSystemQuery.cpp @@ -3,16 +3,11 @@ #include #include +#include namespace DB { - -namespace ErrorCodes -{ - extern const int LOGICAL_ERROR; -} - namespace { std::vector getTypeIndexToTypeName() @@ -24,7 +19,11 @@ namespace auto entries = magic_enum::enum_entries(); for (const auto & [entry, str] : entries) - type_index_to_type_name[static_cast(entry)] = str; + { + auto str_copy = String(str); + std::replace(str_copy.begin(), str_copy.end(), '_', ' '); + type_index_to_type_name[static_cast(entry)] = std::move(str_copy); + } return type_index_to_type_name; } diff --git a/src/Parsers/ParserSystemQuery.cpp b/src/Parsers/ParserSystemQuery.cpp index 5aab3e040e1..81afdad9a6e 100644 --- a/src/Parsers/ParserSystemQuery.cpp +++ b/src/Parsers/ParserSystemQuery.cpp @@ -6,7 +6,7 @@ #include #include -#include +#include #include namespace ErrorCodes @@ -78,7 +78,7 @@ bool ParserSystemQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected & found = true; break; } - } + } if (!found) return false; From b8e2dc6b3e405bef7df959e5b5b95f2d36b15f0c Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Tue, 21 Sep 2021 00:12:56 +0300 Subject: [PATCH 65/80] Update clickhouse-keeper.md --- docs/ru/operations/clickhouse-keeper.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/ru/operations/clickhouse-keeper.md b/docs/ru/operations/clickhouse-keeper.md index 3a724fc3d35..14d95ebae68 100644 --- a/docs/ru/operations/clickhouse-keeper.md +++ b/docs/ru/operations/clickhouse-keeper.md @@ -94,7 +94,7 @@ ClickHouse Keeper может использоваться как равноце ## Как запустить -ClickHouse Keeper входит в пакет` clickhouse-server`, просто добавьте кофигурацию `` и запустите сервер ClickHouse как обычно. Если вы хотите запустить ClickHouse Keeper автономно, сделайте это аналогичным способом: +ClickHouse Keeper входит в пакет `clickhouse-server`, просто добавьте кофигурацию `` и запустите сервер ClickHouse как обычно. Если вы хотите запустить ClickHouse Keeper автономно, сделайте это аналогичным способом: ```bash clickhouse-keeper --config /etc/your_path_to_config/config.xml --daemon @@ -116,4 +116,4 @@ clickhouse-keeper-converter --zookeeper-logs-dir /var/lib/zookeeper/version-2 -- 4. Скопируйте снэпшот на узлы сервера ClickHouse с настроенным `keeper` или запустите ClickHouse Keeper вместо ZooKeeper. Снэпшот должен сохраняться на всех узлах: в противном случае пустые узлы могут захватить лидерство и сконвертированные данные могут быть отброшены на старте. -[Original article](https://clickhouse.com/docs/en/operations/clickhouse-keeper/) \ No newline at end of file +[Original article](https://clickhouse.com/docs/en/operations/clickhouse-keeper/) From 130552330f8eaf566c76271eaa41269e9bd54ba0 Mon Sep 17 00:00:00 2001 From: Eugene Konkov Date: Tue, 21 Sep 2021 00:58:22 +0300 Subject: [PATCH 66/80] Fix error message Fix typo in error message. Better wording. --- programs/copier/ClusterCopier.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/programs/copier/ClusterCopier.cpp b/programs/copier/ClusterCopier.cpp index de26e34bf2e..8c52069dbad 100644 --- a/programs/copier/ClusterCopier.cpp +++ b/programs/copier/ClusterCopier.cpp @@ -1964,7 +1964,7 @@ UInt64 ClusterCopier::executeQueryOnCluster( } catch (...) { - LOG_WARNING(log, "Seemns like node with address {} is unreachable.", node.host_name); + LOG_WARNING(log, "It looks like node with address {} is unreachable.", node.host_name); continue; } From 6f9fedd40e3f91329772850d1528ebec5148b44a Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 21 Sep 2021 01:03:30 +0300 Subject: [PATCH 67/80] Change Benchmark logo --- .../images/logos/logo-benchmark-capital.png | Bin 3090 -> 6251 bytes website/templates/company/investors.html | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/website/images/logos/logo-benchmark-capital.png b/website/images/logos/logo-benchmark-capital.png index 38bbce23e0ea1a0890b477513836f603015df0d3..2587c675c9166a252bb6dfb1c1154d001ea207bd 100644 GIT binary patch literal 6251 zcmd5=`y|G;NIym#Mw?|r}T>$YiiqsI55{rgJHhpXwtxt}_TIF+79%2Z0Nglme}~+^)tMq93Prc_Mgj0A5qnumu`D3w&u7c(A(bEHKF8We2qdQqo zBTx1$xZwlFbKqvT$t#gteB8DSc-7ql0c{+Qw*6rEQst+cY z4${F@`4IOcnccn?2gaLs9jc{cY>t)a$aQsebl48<0?&@GFY}c4BP6eb$xYG!e%=!D zzjed9NlD}-LV)xwP`58YwpXazSM}P-*tGlAGr8JwC|39ghvVLsBO>BfdzikMkSx@T zGgdalFv6Nr=~1A< z-bgYfbvs3X_Vp#RBMcOqU_>9?17aC0|8N6EUR)b({FVt86I$``t=^3_$neMr;#_Ix z8BD6bd8%+Y#9&&0OBE*aMp0`A&DUoAx3#iOmkqjmdIA*?-hdnA&qw{BA8Fwuq9TWV$V%+1U=BO}w(uS{@(lT*e$ z^0NYD9&J3X4ZZrn$^<*9ZaG-TQ8BwHMsdg>x6_d(HG*IllD(Cm@6zRzOv75%q4h3H#$z2n#)WkIkJAW@R-(m)?uyaAR$c=kLeAdR%#7GKB(sHNKdjRir4D6=rlPa7M7sVB`!Dz4Yx1>gwya z1@);qk3JR72#{Ju>pb%44{u5Bh#^gCUlhY&Wn~4`Z0XBXG#yucqJwohRoJGVn4Cg0!}= zKuO(Lju7A*f&UT{TZ%olO2jfc8dYMNU`KOoXV-13dP6=2C%}}xoagmE^&s2{KKc-< zaX625K0dfu{dB&aUBL$*GK|<8@&yig)~ChWO;Quju^s>Q%QXfk(qHX*x(V}Ar3uFS z{{8#n;w@jj`x9_N3HE-APo(c?NSt2bdzF}~-f01nDU9R2-}{^}I6Ks;e<16<^>It3 z@N=cd^Y3Sqcl*@@NgX(FYWwTL%1Qf~xVCmb=fxEwlbgJ?E*PifVZt8oWH90^%*}!8 zLkynBS$Vi>6-ht%(3tX-2@}OS;Rk0Y{(P1~8C_V(djDsOk1o246Nz4<9Hjr=3oG34 zp!kakB}R9GR1%$pQ-8&p11bJdk)%o8@U8~|j?`aY|Ak$9pLM|7+gp#-DzH=y+ojh{GjlY&$>GIghi|BC%-BtTK`Vy+ERz*c+F0O5G*1hf6<(XI$ zPfyS6+Hif~`3F=3@&6qeN{Z zjV^3+@wm;DZPTQg<>lp(w|6#{d2Iak2185)WB*pZDdzr-BaD=|Z)h^SW;=|`X880@ zhctJxSo*&nksvwK4xWmUTD7bVeW1#gjp?3SFg$Tz#)n`bej>C z!Zm9=6y$Kz`L8b8a$Krsy&uuR97ZGoA)E#!ts>MgRE;!Sk5*_`{d(9$ew?6jLaB_oz&bd;<@eES;puD&- zeIW+lwJ@`Sbt-jtyNblUfhKHJjzMu)V~wa`E4_1J<%|?0ETEtw4lNtC^;n|jjP^hxY9I3msq@<)) zhaKt~;5B)0&HZO7Or~3_?Fpxo2T3G zq7p_t*Rime@M8`?e$sSq*HbGH6iLymLjeuO%DJ6-3VAeG{Xm^prZW*){E$YYSqN7% z%ukB$OmoqEYKyACSHtT(2ur+6kRE3|vzPac6X|(tyH_3++1`^N$W#UPAkWb z3>6Kt*-1kvn}rS27ux|J8{*^RLnV_y#>_qUAP^gtHzs?JoK^W0E1v#yqD$VS8z9Vy z{QFwG(n7=Z7Qgyah4!tRv?+dVpNiwYtoN}c$FNQ@ZEvw+Bv!b`gtp=15IOVq^9WLD zT$wDn-OcO3Ao(er<6ptyhX37h9J zT>VZqB_q*Txfw=GHcw4S0hNq<_?m&@#8~^_ag)bpiaD#wpTUnyX_5;UEvCP4f-V({ z@&5?ahjw==HB<=y{S(4G6xu50z07%tSV6Xfs)6(s1RQ-}onDj4zLeP z!X}q;sVZOHo-}ytuRo5#Dr4TKtWBUVP|nGrZuPeSw7PWZ61`&*BzR#bgqB?EbZg-6 zfzsY*P>m|DzwE4lm5j6d(!YVSUrPU>7SZm9<3mv%1Zued@>+6Xt#j6^er(`;tO;~G z93)lb&pYb`4u{i;gVqlcJP5zyf{Fi5M7Q|lIzb5Logiho%cgCdy4NDbR&PZ4o7V%B zrLw+vt5)1RKbvu8YnwH}1n)sBFM4*_S1 z(6mslSa5@vUptcO{h&BeGbPzwKF4-@F)tM&Pf6KY=Q2;Z4^@eG&@YtTgZi~{A~j0D zT9bPb2xT^cbawVZg3>3%J9uz#a9q))JepO;1sRE3ue$73x6`1O2gNch>j#sTvo@D` z-naRAQbY{2#!I`VGT$&|ss!w#$e~&&T%nAPjD!*HAI&mDj8Uf2^yM#Nr97)mWP3&| zA^ePyn$o?dg#J%UJ5Sn!C?)wfyJ$k{p;o!M_Rqft0;Hx}{g|5vv>$5hcyUe@! zg$ZDo!T49bA)8(P!YAw3B9=O9m|XQj`^Z7ED@+1Ws%~EjogDN}ok+|&kdf88Cp+0( zwSDwD=~~LXRJpvxj!7q?aoVL*6&1c|P_z*fJL@AT;ik+6XIxvnVG!#b_}Zr~Pu1er zXO3sNU8EtVGpGO;5JXz+Zzfe#!Sj-R23dE~=|e7FHRTG*BqG&L9oSF96vx5{^ng z=LASP5=I$yURYg9sugrh$Fajg-$mtX-Fb8bwR?RlrP4)HMk2Omd3_y*XV#vd-4t#m zxxZaNMnpx?0vh;yekW=x#UkIX9E+{(e|{?4Xno;yiKo9tOj1&pGsL+I2JmX>^yTWl z#!;W48&`eiI(uA}pLD&gxc_8&F`*J0kV;7j_aY>>*U)TVt5RU(bAXffCkO% z<5T@MqA5ta{+3x4~;_g&2lPf8f5z7tNMZ4Xp#eToI z!4c)h!(nDlnLSg1=`Ajr(2dx$yEqz!p)eP2Y-{KIB;eY4G|XuAr%h(8iEsU2VDkwl ztHI)65(_X;q}k;*PPe))XXuo@bXKi)Op8y0*TdpqkdSOP+jMYT#^4a$c{q}+nN8Ui zZv3D+;R_jvHPd6(R=Ih3H<}dnHS!&*%boD8mygJ1T(d$MzfD@AU!v-fU_?NHqA^i` z5F27*-;g~BXD?q2Ha!YzC}QB{;A})f8OgO4@?H$6fg&DveDSqw?K3~TrP{7i5G|c& zlI1KCzVp}Cw}ik2F}BkTs$>O11@+i z#r-{Yy`i`dVR>*f#lp?du0Y*<Y}F3J zKU#pa&!g#A?RkOBIb>fT_grX(4{LHjnG(9g4bLF-asA0SWmVNPQms#!+>^N%*^>Xy z_G91Kd$Y@0WW_~87SsznpKVybFsA%F7BRcW+U;iC^}Ul!AVf;8c1`i2Uwi7V6V`KU z(bKwJtE^YXfCMn}^PKQ8w{KTFBu&aZu$H+Ik9R$7p4wd|iCDjy3-?3YssDk$t*58w z9{Ld4;MZ|E)(w7TpP*!SH7lSdmHvhZs4eO8)xIe57(AsVf<_I<@A2{AKYrMB9oIJ^&I2Hn$RQR$T;H+jSm{AwW|boz3kiZ(Kexovw_ zBZ>aV!=dm-I&Hkt4CAAV|9@Nz14 zeqmwF^ym*-LrC7WgMa-;?H`{+2?W9`se*{q>OQ-IG*hH6`5Rf7n7U{cz4ra7{J?s; z%}rY_G}N_jZhpQ3i|s|G|FV?a{Ngka1Yt_78u!C_)AOfgSDtbO$e~1_W1&}MzCw^E zk_=18&CMkz;Y1Grh3*9Rnyz1S#?6msl=U4acs!oFR@~|3<`(R0&d`u+IoqMywJ-}B z^KV$w+eCgQ&@;2OR~;HQ`IDh~CHythJK0|b7U^YhGP`ByZoTpIYua+2qm1(kk|BF{ zmh91aWaiwjLv_+;`i=m880=Nj9r9d(&NsEE+tRYm6iJY66n|u=EtyR18cJ6)#yU~6 zSrgG0iX0-|nmziM-olAwDm738#==H~ZMR}$KRCCWp{UXl_of9<4U&VQkzx)CwQuPS zT0a<6BTi(nhDJ8^_+ffX?p|i)9D{zA`77vM*!b6Tt<6=O$e4>vZSi-Di)Y(cfM9T! zz(r)z#;*()Vtb|%jaNJ_cL31|qB!)zM|U}&f5NkIzD>%?%9>I+K<&kvw1;530msB( zFnoSY%O^FQx0^B92eMB+O)O&vpN}J{Rl`t}ka$)=!wWwSY|$n`3h^iM0z`Ob|LRr> z^+XVmtPBR@BZi;ac!<81Y_5{$@8k3NUQAnC8^Pk)I|BfoghH=BD*p~R`YhaVV~sxr zdL7QDz{XgUEp`+I@E5b3!vQpo^rZG9M}`pF>w&Mts6H# zj4MM?kN2RIi5|1Rt?f>{0AY=dRery;2s#V%^O1aB;=8W0(+GoT14RL{9nUWbroR&1 zxueM=g*v`CA-q(Vc^}tAq4?JiUSMH>^-fMs(ho%2w;c)|0bt`cvcf^r2?+V1b3CwI zM1+~pv#Phmqnp$p7CP-Xw&Tve$pe!hl-k?dL+^ClQ)v<)&G?zi8(Z?6no2L)EnlTv z0BJG$k9rzkdTHNC+(I9e6q^;8@FUI5&1;yUb(zC5*z$5s^H9$4@ck#x1{RTkmPN-% zkKOd5#_V-B6^(!YZtP~e-&BxZTJpbM$MZ`DDnQS{dPUxB(nbfij5;JPTC7wq))no|z53c$e1i(! zkGVYTv1zMNsz4Mw6M}T>R&1^PiLCJGwkQXLfud~Gq0S)J!vrHgN(wPyk6pFvbNb?q zb0rxa-<&PBmcrfDyiNeU-tY64fk;}J_k}3*|oD5O;Nl9`z2zn~9MMIn4o)xFF z5Ttv5R666#kE*IxeHoktJ}zM(n!Fb+#4^TKS66|~?h<%fUy&5uscMm8YHn_uR(A5U z3jYbEXz5B19HMPf>OkRkFt0>?f6aPfZ+T8ff|f=}jC!r#$u_ zoEi;lssJAx4rtMqtyf1*rBnVUQnq+e^y;dr6Lk*eJLplrn{>DB3x1s3Do}s$Bm|-W zK&UswpcIPPKDx^*UEN-B>qRp=qAqhlu0Pz+=vu=Jn;g~nOU?G@x3*X9?PeRI{vwW_ zL$vm#KjvPQ6HUu9DS(v)iiwD<&L1n;AtGX&4hNHO;^{{~)BA)t0yMI7)&5s*+_%#M zgc<1SK)xf`tH?dLZjl2fY7ZESD}4IBZ;Aij_wh0iQ*7`Tk?qB(f}b%7i6pYwU2_vd|H=bulCgWV}P88sOY2qb578sh{4i6KR~ zgY-6$uc;b4Bnn*)I2S8XM0uuI;ZrG*k<_RtQSK2`fc8ug*=c}@#;2hEGfxKkkD>6Q zBn6F6wem@`JAVz0XV?)l(cY<|MiJQBn~4&^yi!Fl)E_S@T6v}YxlP4*{RW6ye$#sX zX8w;Kll5WkGV!;SXnxy$YQ_X z*L`1fM{-rpM*i{fJBgpV$qP!W10ax0n+?X&CF<2&;p@CYBrNG(gB1*#!O(kE%b01N z6nwe5RUOFak~15%%dd0aXuuj#rY0w#6%PHO1sP6JiaF^>&}UczuLH+Skk0xdNQ*ns z;MF~&b)MO4wzlxd^$n$TYxr6Bm&2zf_@o(POQdC78b zyM0uSlH!gYqw{{1z39<9F`Nf?O*pJPsz9+O(3n+j24%lj9#lRG5n4lXhZ#<1Z> zICt1D2^Z8nH85+`1_)n-Pd)9QsnKH0>w!RwYS-thxD~TbAkaZs zv5w_KB1ACe9gj2==N^YFhS>Bt{slGk9exp@1z|F{fzDtefoq1VIeF7$lzsx5^nLzLm zU1F_G9haz+TR?At-da%x)G|l73?*6q)C9e#o}79#sP{t#bhRrD!JMO zR=7BZy`WJZzeTP5iiDmz^JndpEf&>h_-23ufYdVkm(jEKNlB2?-2CW%qb8{B`6ijTgzE3Mp z*4B|P>af?kK^J*cX|v7t0mVI>nL9|0N~AMC#>0;U__q(u-PDoex%sjRkzbq0+_CLN zNNpeX*@gpvlH}v6UpH^$9e2`iS2p>lPL*Dut^8h5z$3AyR(fMhfX?(QpPHBZ$iNS~ z>LURYk!yt~WWckRveNehS;dE@Ri{0xSefl1a+5c1`SKNml1Kph)$UeC;V?}ukDXx! z`G%BA$FeMbuDoc%&k-7G)5w(miy#d^V2F7 zPZ|aPtZ+P^Cp0XtgcJ#RjG+e-HJsJyZkbL~6~pQG5&|CC!||@>nOg2GQ!C8txmF}= z{q{h+g7R&CLIm)!7Xkp=%xF&FIpUY%DK zZ%_JpVGQV+f5y}E-3do}kHYd+ph-1X*YEoR=v$4I)RypeL#Usa(Z z+ImeW#J4aR5w!fKymHyC3|y%c>%WtH?&r5(o*v2M?Y6w6%h|}(>FL2HPDr1uY<0Y@ zAYvZ+x@@NhEn@JDfWhRHBB%R>z%O3+7 zw-L>66(&k%69l*ut2bwP7oIMQS0b^Ld}Z2TtL~BQies+d6ua(cuLsNiU9|(8INTgG z+YFCckOMR@?kOI9cfd!5?(FAqS70$E6i~K?Y}vbyy%n=LnxCc|HuM3nCb_5?J5Ups ziBwHJGmd&O0{%wYssTNx=|g9F)hr}=efd-U;a$7~FT9{+OT|4G22mKu32S?z#dP}1 zrpII6*wZ15;Wbm&8wR<5x4h~4*OlKQ72Rr7P?r*#Js3yJQH3FpmlW_dS<4?D&|9o6BmO zT46-RR(*Z%@!R&hv9;;e#(j@HalsE;O&k?ZC8MV_{K!WV_r49tA+L$MGLqYVrNH0T zFRezmnezr`c6{a}Wr#m?j8PtBG<+_;YiaVUAcOjIINDl}ABh+*sg0_R_Wt6&l6W5m zGCcb;O4pMdyzSKPgC#Xzx_bg5=9{Z#+fohV!5s0rZ&kX_GO)_iph-o?ZMg)_@aOqS zSS25SaK%@Rv#ZE|6C3^JU`ROYm=d$p_*}2ivmg6jE=gtIKzd)7eKPTeb$AvC-m%jB z=mc@}&`OqFG>jL&`Mq@O*Nn>10Jp}8*PfLEW3$?Hn$N~tW z@6!Tb?pgX!wq+4yC|mgX#)rTkx!lhQI>BWs>{=4Vb$A@X1>49w60&~$9ZRts>|3lW zU+=X)f&~-)`CmU-QL+Lgt7+>g%*pHRKpaKgob%cGy5)knc;3LiQ+nUVJV4sinVAY} z@iNG7dvnQOvCU)Y7_qjZ`S8P-3PttFPxHY+jW@SC_6FJ#y&C7tVpZEa0Ip<_;?Ca4zY&nAl=*d#EjWL_W^fkwsV) zESKP=)CprgigWN~4hB-yy8j(>R`)gI$3iAGdWYr@zDe1rgQR+nlWpb>a@(@W;yl diff --git a/website/templates/company/investors.html b/website/templates/company/investors.html index 4d6224e7603..0dd38085c83 100644 --- a/website/templates/company/investors.html +++ b/website/templates/company/investors.html @@ -14,7 +14,7 @@

{{ _('Board Member') }}

- +
From bf79920d2d18bf128111e30694490392df17e938 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 21 Sep 2021 01:11:08 +0300 Subject: [PATCH 68/80] Update benchmark logo (now transparent) --- .../images/logos/logo-benchmark-capital.png | Bin 6251 -> 3154 bytes 1 file changed, 0 insertions(+), 0 deletions(-) diff --git a/website/images/logos/logo-benchmark-capital.png b/website/images/logos/logo-benchmark-capital.png index 2587c675c9166a252bb6dfb1c1154d001ea207bd..626599c6b35bf68388df4ab09ed0e26aaa878e8e 100644 GIT binary patch literal 3154 zcmb_ec{tQ-8-9f-pX_A!8Cx7hAu%LjI5oC7_JqcgT{9tMEM?6cWy==kD~@S64pB1p z$&#fZW6Rj;*tZy4P0r7`u5(>|=W_n&yS_i}>v^B&{XNh9y!Y>)JHgx(!Fg2VC;))d z#MrnB)Nm;(G}HTHyaivQPfk(7(rTNeax8WU)7v zaf9b?<`DTn;A90V=~;z~UqfHTPrnb-oj#Z9u179CF|CiR9$bjHqdrKQGW&AA{N>H%ujQ{sUiY=SoxSbBgBE)KWIbL0}xwwpyPHm};Picl7Z!8fuPbiwcP(Ks+ zL<}Alh%l08m^tM{jv!;eurr66)gf28+WvO5kcmq_9TSus zH~=Y1G2)jWd_9o&fbJqp@+|Jif71*rdSLvoSyuzZWNPkqmPV1z!o*D%BPsovtj=b9 zl!9(_%Uu|9+F)l(OZ@lJ-s{j7$&v|1=>(>9FA_KO4$hOW9oV`E_e4||Qi!W7N(A`@ zQx)hde(@wO@hu+R@to+immdn`;35f!3spJqJ{?FsNpX>}PK5qn8Gl@$LRX9_C9@pX zvc919h?hBF(`gT{nP!xStoRyggX~*zPs9*{$dF3gRcZzZH;#>Xn%V_gO|;u0VZZ3@ zx#3Q4zp73Q<5+qB-m;oPUvT_fV{)eBN*jODC-GOBn^zaw$&op%tv<^Q>K?6%zwp!= zw0V8$Qc!b%mLF1V$tq4YbZ^-d?uta6Q8R zI%~^BFP4$Rs5U7cH5cNIUM=~dv#pJogFi~>b#uj#$hx8>kx=Cu*qVGWtrO_I+u+@mldS#gu)V674!seFylqgACKFliNY1FW49IH zimBL&4=1DT;%DPmJ+Dul)SniN7!w`T{Q;&ZTNbBd7-A)Jj}I&Uv1k}Rg}%ecmW}aJ z+_l!3A*I``b2!hBt<;@bikfCn`b0ha(iodXniJ;ZwfG_Hqj7E2B0u5dB%uUeQ>{bC z?QCpF+%BKQNIlPIQz=c;&iL(Z`;AemFV}#t{5^sHNYp)J8r^)C))1>gIx9#nW54kp z;qTN!T^<@0J~zJV89Y?|0HfH_--gjuIkWTITymj1CwS<&u)Mu4+0j@p4bRX)yowtU zONo1woUba}Ww&M#QdeS}EQA1WMYWK&n9G^RViC*~9^hlO%X(`jy33SDqJn-*t;}aO z*U8$!TXCl4^dbRURa16`U>s8A)t1M{Z@(c)`;bh z;`;68l^-W<-4FkAcx~;WAo6mzoU^}c(NzFj6QvrM&puB zk3igy$2H?;jSDGg_oaE}2l7kmObW6;?OYi#8B~UAj5*Hu6;I_m@0@GtID=qZ8-eL= zY8Bo^w6Kh#Jk)>Cl)f<%=}3gM$$#M`lDE--S;0j{!= zwn!aSY23bl;e5F^I;Lg4nAw-qS$fXq)6{g4Nt;BTM}`{%Ii-Ocsdu=#Gic@>{z@!M zXL_kcU@JRA+i|WlGyBO2^VIsCq0(!YY_jOlzikiou(`8)`N$`#K35yG!N^6 zfJg6?oAH(bW~5tsTZwP{U%as{a}0U6xJ(#KF=a0~>u=9t5nZ_&5;`X;7msVyzFl)Q z?Z-T;6OUgBN~DuMFAQkwTGY3}gT>}jAYG=hXK7Lu&ryRV5^ve0(-km`Gy%!Gh$f!I zR%e|8-dLHfPI@9F--K~f#jQJJn!8g0*A2ffw5sKsJye|^t;$Oc14YGOZbf`)!sPUd zG}MA$x9Yv)gc&13hDqDDg&AAU30JYb-MRCO%wY0@_T_ZKV~60}8Ha~Nwq1&ZH0ES@ z>BY+0Q)}6u>o=zpdPb?{i_czZNU<2r7GHF#^p;FtWq&OQ)YiSczt0)%dx>xT!?))H zUC>|d=zHDJw}bzF{zsy3Np`Cs54RPEVgch|h$AuFB&kz4gvlRRgI0%hu z$o>sUln|Q|pB+yfWTR!#hPLSIE@+gNTM!Bg;IxXWhO(-Pvg%nYRZT6m(^{%($|`DF zDk`j|U5@|N;Ol?g%RTg;8_;~{38+DMpW%wXJ381U2n8-&_jg4}nfSVRpe#`?*Fyt4 RQ2X88NGl<8lK{sX)hHthfa literal 6251 zcmd5=`y|G;NIym#Mw?|r}T>$YiiqsI55{rgJHhpXwtxt}_TIF+79%2Z0Nglme}~+^)tMq93Prc_Mgj0A5qnumu`D3w&u7c(A(bEHKF8We2qdQqo zBTx1$xZwlFbKqvT$t#gteB8DSc-7ql0c{+Qw*6rEQst+cY z4${F@`4IOcnccn?2gaLs9jc{cY>t)a$aQsebl48<0?&@GFY}c4BP6eb$xYG!e%=!D zzjed9NlD}-LV)xwP`58YwpXazSM}P-*tGlAGr8JwC|39ghvVLsBO>BfdzikMkSx@T zGgdalFv6Nr=~1A< z-bgYfbvs3X_Vp#RBMcOqU_>9?17aC0|8N6EUR)b({FVt86I$``t=^3_$neMr;#_Ix z8BD6bd8%+Y#9&&0OBE*aMp0`A&DUoAx3#iOmkqjmdIA*?-hdnA&qw{BA8Fwuq9TWV$V%+1U=BO}w(uS{@(lT*e$ z^0NYD9&J3X4ZZrn$^<*9ZaG-TQ8BwHMsdg>x6_d(HG*IllD(Cm@6zRzOv75%q4h3H#$z2n#)WkIkJAW@R-(m)?uyaAR$c=kLeAdR%#7GKB(sHNKdjRir4D6=rlPa7M7sVB`!Dz4Yx1>gwya z1@);qk3JR72#{Ju>pb%44{u5Bh#^gCUlhY&Wn~4`Z0XBXG#yucqJwohRoJGVn4Cg0!}= zKuO(Lju7A*f&UT{TZ%olO2jfc8dYMNU`KOoXV-13dP6=2C%}}xoagmE^&s2{KKc-< zaX625K0dfu{dB&aUBL$*GK|<8@&yig)~ChWO;Quju^s>Q%QXfk(qHX*x(V}Ar3uFS z{{8#n;w@jj`x9_N3HE-APo(c?NSt2bdzF}~-f01nDU9R2-}{^}I6Ks;e<16<^>It3 z@N=cd^Y3Sqcl*@@NgX(FYWwTL%1Qf~xVCmb=fxEwlbgJ?E*PifVZt8oWH90^%*}!8 zLkynBS$Vi>6-ht%(3tX-2@}OS;Rk0Y{(P1~8C_V(djDsOk1o246Nz4<9Hjr=3oG34 zp!kakB}R9GR1%$pQ-8&p11bJdk)%o8@U8~|j?`aY|Ak$9pLM|7+gp#-DzH=y+ojh{GjlY&$>GIghi|BC%-BtTK`Vy+ERz*c+F0O5G*1hf6<(XI$ zPfyS6+Hif~`3F=3@&6qeN{Z zjV^3+@wm;DZPTQg<>lp(w|6#{d2Iak2185)WB*pZDdzr-BaD=|Z)h^SW;=|`X880@ zhctJxSo*&nksvwK4xWmUTD7bVeW1#gjp?3SFg$Tz#)n`bej>C z!Zm9=6y$Kz`L8b8a$Krsy&uuR97ZGoA)E#!ts>MgRE;!Sk5*_`{d(9$ew?6jLaB_oz&bd;<@eES;puD&- zeIW+lwJ@`Sbt-jtyNblUfhKHJjzMu)V~wa`E4_1J<%|?0ETEtw4lNtC^;n|jjP^hxY9I3msq@<)) zhaKt~;5B)0&HZO7Or~3_?Fpxo2T3G zq7p_t*Rime@M8`?e$sSq*HbGH6iLymLjeuO%DJ6-3VAeG{Xm^prZW*){E$YYSqN7% z%ukB$OmoqEYKyACSHtT(2ur+6kRE3|vzPac6X|(tyH_3++1`^N$W#UPAkWb z3>6Kt*-1kvn}rS27ux|J8{*^RLnV_y#>_qUAP^gtHzs?JoK^W0E1v#yqD$VS8z9Vy z{QFwG(n7=Z7Qgyah4!tRv?+dVpNiwYtoN}c$FNQ@ZEvw+Bv!b`gtp=15IOVq^9WLD zT$wDn-OcO3Ao(er<6ptyhX37h9J zT>VZqB_q*Txfw=GHcw4S0hNq<_?m&@#8~^_ag)bpiaD#wpTUnyX_5;UEvCP4f-V({ z@&5?ahjw==HB<=y{S(4G6xu50z07%tSV6Xfs)6(s1RQ-}onDj4zLeP z!X}q;sVZOHo-}ytuRo5#Dr4TKtWBUVP|nGrZuPeSw7PWZ61`&*BzR#bgqB?EbZg-6 zfzsY*P>m|DzwE4lm5j6d(!YVSUrPU>7SZm9<3mv%1Zued@>+6Xt#j6^er(`;tO;~G z93)lb&pYb`4u{i;gVqlcJP5zyf{Fi5M7Q|lIzb5Logiho%cgCdy4NDbR&PZ4o7V%B zrLw+vt5)1RKbvu8YnwH}1n)sBFM4*_S1 z(6mslSa5@vUptcO{h&BeGbPzwKF4-@F)tM&Pf6KY=Q2;Z4^@eG&@YtTgZi~{A~j0D zT9bPb2xT^cbawVZg3>3%J9uz#a9q))JepO;1sRE3ue$73x6`1O2gNch>j#sTvo@D` z-naRAQbY{2#!I`VGT$&|ss!w#$e~&&T%nAPjD!*HAI&mDj8Uf2^yM#Nr97)mWP3&| zA^ePyn$o?dg#J%UJ5Sn!C?)wfyJ$k{p;o!M_Rqft0;Hx}{g|5vv>$5hcyUe@! zg$ZDo!T49bA)8(P!YAw3B9=O9m|XQj`^Z7ED@+1Ws%~EjogDN}ok+|&kdf88Cp+0( zwSDwD=~~LXRJpvxj!7q?aoVL*6&1c|P_z*fJL@AT;ik+6XIxvnVG!#b_}Zr~Pu1er zXO3sNU8EtVGpGO;5JXz+Zzfe#!Sj-R23dE~=|e7FHRTG*BqG&L9oSF96vx5{^ng z=LASP5=I$yURYg9sugrh$Fajg-$mtX-Fb8bwR?RlrP4)HMk2Omd3_y*XV#vd-4t#m zxxZaNMnpx?0vh;yekW=x#UkIX9E+{(e|{?4Xno;yiKo9tOj1&pGsL+I2JmX>^yTWl z#!;W48&`eiI(uA}pLD&gxc_8&F`*J0kV;7j_aY>>*U)TVt5RU(bAXffCkO% z<5T@MqA5ta{+3x4~;_g&2lPf8f5z7tNMZ4Xp#eToI z!4c)h!(nDlnLSg1=`Ajr(2dx$yEqz!p)eP2Y-{KIB;eY4G|XuAr%h(8iEsU2VDkwl ztHI)65(_X;q}k;*PPe))XXuo@bXKi)Op8y0*TdpqkdSOP+jMYT#^4a$c{q}+nN8Ui zZv3D+;R_jvHPd6(R=Ih3H<}dnHS!&*%boD8mygJ1T(d$MzfD@AU!v-fU_?NHqA^i` z5F27*-;g~BXD?q2Ha!YzC}QB{;A})f8OgO4@?H$6fg&DveDSqw?K3~TrP{7i5G|c& zlI1KCzVp}Cw}ik2F}BkTs$>O11@+i z#r-{Yy`i`dVR>*f#lp?du0Y*<Y}F3J zKU#pa&!g#A?RkOBIb>fT_grX(4{LHjnG(9g4bLF-asA0SWmVNPQms#!+>^N%*^>Xy z_G91Kd$Y@0WW_~87SsznpKVybFsA%F7BRcW+U;iC^}Ul!AVf;8c1`i2Uwi7V6V`KU z(bKwJtE^YXfCMn}^PKQ8w{KTFBu&aZu$H+Ik9R$7p4wd|iCDjy3-?3YssDk$t*58w z9{Ld4;MZ|E)(w7TpP*!SH7lSdmHvhZs4eO8)xIe57(AsVf<_I<@A2{AKYrMB9oIJ^&I2Hn$RQR$T;H+jSm{AwW|boz3kiZ(Kexovw_ zBZ>aV!=dm-I&Hkt4CAAV|9@Nz14 zeqmwF^ym*-LrC7WgMa-;?H`{+2?W9`se*{q>OQ-IG*hH6`5Rf7n7U{cz4ra7{J?s; z%}rY_G}N_jZhpQ3i|s|G|FV?a{Ngka1Yt_78u!C_)AOfgSDtbO$e~1_W1&}MzCw^E zk_=18&CMkz;Y1Grh3*9Rnyz1S#?6msl=U4acs!oFR@~|3<`(R0&d`u+IoqMywJ-}B z^KV$w+eCgQ&@;2OR~;HQ`IDh~CHythJK0|b7U^YhGP`ByZoTpIYua+2qm1(kk|BF{ zmh91aWaiwjLv_+;`i=m880=Nj9r9d(&NsEE+tRYm6iJY66n|u=EtyR18cJ6)#yU~6 zSrgG0iX0-|nmziM-olAwDm738#==H~ZMR}$KRCCWp{UXl_of9<4U&VQkzx)CwQuPS zT0a<6BTi(nhDJ8^_+ffX?p|i)9D{zA`77vM*!b6Tt<6=O$e4>vZSi-Di)Y(cfM9T! zz(r)z#;*()Vtb|%jaNJ_cL31|qB!)zM|U}&f5NkIzD>%?%9>I+K<&kvw1;530msB( zFnoSY%O^FQx0^B92eMB+O)O&vpN}J{Rl`t}ka$)=!wWwSY|$n`3h^iM0z`Ob|LRr> z^+XVmtPBR@BZi;ac!<81Y_5{$@8k3NUQAnC8^Pk)I|BfoghH=BD*p~R`YhaVV~sxr zdL7QDz{XgUEp`+I@E5b3!vQpo^rZG9M}`pF>w&Mts6H# zj4MM?kN2RIi5|1Rt?f>{0AY=dRery;2s#V%^O1aB;=8W0(+GoT14RL{9nUWbroR&1 zxueM=g*v`CA-q(Vc^}tAq4?JiUSMH>^-fMs(ho%2w;c)|0bt`cvcf^r2?+V1b3CwI zM1+~pv#Phmqnp$p7CP-Xw&Tve$pe!hl-k?dL+^ClQ)v<)&G?zi8(Z?6no2L)EnlTv z0BJG$k9rzkdTHNC+(I9e6q^;8@FUI5&1;yUb(zC5*z$5s^H9$4@ck#x1{RTkmPN-% zkKOd5#_V-B6^(!YZtP~e-&BxZTJpbM$MZ`DDnQS{dPUxB(nbfij5;JPTC7wq))no|z53c$e1i(! zkGVYTv1zMNsz4Mw6M}T>R&1^PiLCJGwkQXLfud~Gq0S)J!vrHgN(wPyk6pFvbNb?q zb0rxa-<&PBmcrfDyiNeU-tY64fk;}J_k}3*|oD5O;Nl9`z2zn~9MMIn4o)xFF z5Ttv5R666#kE*IxeHoktJ}zM(n!Fb+#4^TKS66|~?h<%fUy&5uscMm8YHn_uR(A5U z3jYbEXz5B19HMPf>OkRkFt0>?f6aPfZ+T8ff|f=}jC!r#$u_ zoEi;lssJAx4rtMqtyf1*rBnVUQnq+e^y;dr6Lk*eJLplrn{>DB3x1s3Do}s$Bm|-W zK&UswpcIPPKDx^*UEN-B>qRp=qAqhlu0Pz+=vu=Jn;g~nOU?G@x3*X9?PeRI{vwW_ zL$vm#KjvPQ6HUu9DS(v)iiwD<&L1n;AtGX&4hNHO;^{{~)BA)t0yMI7)&5s*+_%#M zgc<1SK)xf`tH?dLZjl2fY7ZESD}4IBZ;Aij_wh0iQ*7`Tk?qB(f}b Date: Tue, 21 Sep 2021 01:19:40 +0300 Subject: [PATCH 69/80] Update programs/copier/ClusterCopier.cpp --- programs/copier/ClusterCopier.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/programs/copier/ClusterCopier.cpp b/programs/copier/ClusterCopier.cpp index 8c52069dbad..1e8222f8769 100644 --- a/programs/copier/ClusterCopier.cpp +++ b/programs/copier/ClusterCopier.cpp @@ -1964,7 +1964,7 @@ UInt64 ClusterCopier::executeQueryOnCluster( } catch (...) { - LOG_WARNING(log, "It looks like node with address {} is unreachable.", node.host_name); + LOG_WARNING(log, "Node with address {} seems to be unreachable.", node.host_name); continue; } From ad2052e8e52b2ee33f0d214b0800c64dc2b60c7f Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Tue, 21 Sep 2021 02:23:34 +0300 Subject: [PATCH 70/80] Added comments --- src/Parsers/ASTSystemQuery.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/Parsers/ASTSystemQuery.cpp b/src/Parsers/ASTSystemQuery.cpp index f50bcf5041f..ba8e49b98ca 100644 --- a/src/Parsers/ASTSystemQuery.cpp +++ b/src/Parsers/ASTSystemQuery.cpp @@ -31,6 +31,9 @@ namespace const char * ASTSystemQuery::typeToString(Type type) { + /** During parsing if SystemQuery is not parsed properly it is added to Expected variants as description check IParser.h. + * Description string must be statically allocated. + */ static std::vector type_index_to_type_name = getTypeIndexToTypeName(); const auto & type_name = type_index_to_type_name[static_cast(type)]; return type_name.data(); From 2de35420323db0adde8555f1e12b24f007aa104a Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 21 Sep 2021 03:22:45 +0300 Subject: [PATCH 71/80] Change copyright, step 2 --- docs/tools/blog.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/tools/blog.py b/docs/tools/blog.py index d0f2496f914..bfc8c0908e9 100644 --- a/docs/tools/blog.py +++ b/docs/tools/blog.py @@ -51,7 +51,7 @@ def build_for_lang(lang, args): if args.htmlproofer: plugins.append('htmlproofer') - website_url = 'https://clickhouse.tech' + website_url = 'https://clickhouse.com' site_name = site_names.get(lang, site_names['en']) blog_nav, post_meta = nav.build_blog_nav(lang, args) raw_config = dict( @@ -62,7 +62,7 @@ def build_for_lang(lang, args): strict=True, theme=theme_cfg, nav=blog_nav, - copyright='©2016–2021 Yandex LLC', + copyright='©2016–2021 ClickHouse, Inc.', use_directory_urls=True, repo_name='ClickHouse/ClickHouse', repo_url='https://github.com/ClickHouse/ClickHouse/', From 51e45050c1e915e7a334c5860f5a704742319b7f Mon Sep 17 00:00:00 2001 From: bharatnc Date: Mon, 20 Sep 2021 18:21:48 -0700 Subject: [PATCH 72/80] Update outdated ya.make files This runs the `utils/generate-ya-make/generate-ya-make.sh` script to update the ya.make files that are not updated. I wonder why they were not updated and how it was missed. --- src/Databases/ya.make | 2 ++ src/Functions/ya.make | 1 + src/Interpreters/ya.make | 1 + src/Processors/ya.make | 5 +---- src/Storages/ya.make | 1 + 5 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/Databases/ya.make b/src/Databases/ya.make index 34f47a5edf0..d088ba16fe2 100644 --- a/src/Databases/ya.make +++ b/src/Databases/ya.make @@ -9,6 +9,7 @@ PEERDIR( SRCS( + DDLDependencyVisitor.cpp DatabaseAtomic.cpp DatabaseDictionary.cpp DatabaseFactory.cpp @@ -30,6 +31,7 @@ SRCS( SQLite/DatabaseSQLite.cpp SQLite/SQLiteUtils.cpp SQLite/fetchSQLiteTableStructure.cpp + TablesLoader.cpp ) diff --git a/src/Functions/ya.make b/src/Functions/ya.make index 877e342f036..431f279e682 100644 --- a/src/Functions/ya.make +++ b/src/Functions/ya.make @@ -304,6 +304,7 @@ SRCS( h3IndexesAreNeighbors.cpp h3IsValid.cpp h3ToChildren.cpp + h3ToGeoBoundary.cpp h3ToParent.cpp h3ToString.cpp h3kRing.cpp diff --git a/src/Interpreters/ya.make b/src/Interpreters/ya.make index e8b33d09914..da9d3d497eb 100644 --- a/src/Interpreters/ya.make +++ b/src/Interpreters/ya.make @@ -28,6 +28,7 @@ SRCS( ApplyWithSubqueryVisitor.cpp ArithmeticOperationsInAgrFuncOptimize.cpp ArrayJoinAction.cpp + AsynchronousInsertQueue.cpp AsynchronousMetricLog.cpp AsynchronousMetrics.cpp BloomFilter.cpp diff --git a/src/Processors/ya.make b/src/Processors/ya.make index 7d1bf047712..f2063609440 100644 --- a/src/Processors/ya.make +++ b/src/Processors/ya.make @@ -28,6 +28,7 @@ SRCS( Executors/PollingQueue.cpp Executors/PullingAsyncPipelineExecutor.cpp Executors/PullingPipelineExecutor.cpp + Executors/StreamingFormatExecutor.cpp ForkProcessor.cpp Formats/IInputFormat.cpp Formats/IOutputFormat.cpp @@ -58,12 +59,8 @@ SRCS( Formats/Impl/MySQLOutputFormat.cpp Formats/Impl/NullFormat.cpp Formats/Impl/ODBCDriver2BlockOutputFormat.cpp - Formats/Impl/ORCBlockInputFormat.cpp - Formats/Impl/ORCBlockOutputFormat.cpp Formats/Impl/ParallelFormattingOutputFormat.cpp Formats/Impl/ParallelParsingInputFormat.cpp - Formats/Impl/ParquetBlockInputFormat.cpp - Formats/Impl/ParquetBlockOutputFormat.cpp Formats/Impl/PostgreSQLOutputFormat.cpp Formats/Impl/PrettyBlockOutputFormat.cpp Formats/Impl/PrettyCompactBlockOutputFormat.cpp diff --git a/src/Storages/ya.make b/src/Storages/ya.make index b9eb47e2ab8..c0da9b29382 100644 --- a/src/Storages/ya.make +++ b/src/Storages/ya.make @@ -164,6 +164,7 @@ SRCS( StorageView.cpp StorageXDBC.cpp System/StorageSystemAggregateFunctionCombinators.cpp + System/StorageSystemAsynchronousInserts.cpp System/StorageSystemAsynchronousMetrics.cpp System/StorageSystemBuildOptions.cpp System/StorageSystemClusters.cpp From 7b637f853e35262fa5671b59d06e73ab8d7409f2 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Tue, 21 Sep 2021 09:45:52 +0300 Subject: [PATCH 73/80] Update press.html --- website/templates/company/press.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/website/templates/company/press.html b/website/templates/company/press.html index 13f41f58190..8265b68b063 100644 --- a/website/templates/company/press.html +++ b/website/templates/company/press.html @@ -1,4 +1,4 @@ -
+
From 571dd3acfb780e0743db2893de8d0c07512a5ab0 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Tue, 21 Sep 2021 10:04:21 +0300 Subject: [PATCH 74/80] fix style check --- .gitignore | 3 +++ docs/ja/development/cmake-in-clickhouse.md | 1 - docs/ru/development/cmake-in-clickhouse.md | 1 - docs/tools/cmake_in_clickhouse_generator.py | 6 ++++++ docs/zh/development/cmake-in-clickhouse.md | 1 - 5 files changed, 9 insertions(+), 3 deletions(-) delete mode 120000 docs/ja/development/cmake-in-clickhouse.md delete mode 120000 docs/ru/development/cmake-in-clickhouse.md delete mode 120000 docs/zh/development/cmake-in-clickhouse.md diff --git a/.gitignore b/.gitignore index a469ff7bca1..0bf31508419 100644 --- a/.gitignore +++ b/.gitignore @@ -33,6 +33,9 @@ /docs/ja/single.md /docs/fa/single.md /docs/en/development/cmake-in-clickhouse.md +/docs/ja/development/cmake-in-clickhouse.md +/docs/zh/development/cmake-in-clickhouse.md +/docs/ru/development/cmake-in-clickhouse.md # callgrind files callgrind.out.* diff --git a/docs/ja/development/cmake-in-clickhouse.md b/docs/ja/development/cmake-in-clickhouse.md deleted file mode 120000 index 0eb485952cd..00000000000 --- a/docs/ja/development/cmake-in-clickhouse.md +++ /dev/null @@ -1 +0,0 @@ -../../en/development/cmake-in-clickhouse.md \ No newline at end of file diff --git a/docs/ru/development/cmake-in-clickhouse.md b/docs/ru/development/cmake-in-clickhouse.md deleted file mode 120000 index 0eb485952cd..00000000000 --- a/docs/ru/development/cmake-in-clickhouse.md +++ /dev/null @@ -1 +0,0 @@ -../../en/development/cmake-in-clickhouse.md \ No newline at end of file diff --git a/docs/tools/cmake_in_clickhouse_generator.py b/docs/tools/cmake_in_clickhouse_generator.py index 1414ffc4b9e..8b440823df3 100644 --- a/docs/tools/cmake_in_clickhouse_generator.py +++ b/docs/tools/cmake_in_clickhouse_generator.py @@ -155,6 +155,12 @@ def generate_cmake_flags_files() -> None: with open(footer_file_name, "r") as footer: f.write(footer.read()) + other_languages = ["docs/ja/development/cmake-in-clickhouse.md", + "docs/zh/development/cmake-in-clickhouse.md", + "docs/ru/development/cmake-in-clickhouse.md"] + + for lang in other_languages: + os.symlink(output_file_name, os.path.join(root_path, lang)) if __name__ == '__main__': generate_cmake_flags_files() diff --git a/docs/zh/development/cmake-in-clickhouse.md b/docs/zh/development/cmake-in-clickhouse.md deleted file mode 120000 index 0eb485952cd..00000000000 --- a/docs/zh/development/cmake-in-clickhouse.md +++ /dev/null @@ -1 +0,0 @@ -../../en/development/cmake-in-clickhouse.md \ No newline at end of file From e60b863898b0c46b5f37ccdd40a8caecd33120f3 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Tue, 21 Sep 2021 10:43:57 +0300 Subject: [PATCH 75/80] [docs] fix dark theme --- website/css/main.css | 1 - 1 file changed, 1 deletion(-) diff --git a/website/css/main.css b/website/css/main.css index 4e812b8fabc..9b676804eba 100644 --- a/website/css/main.css +++ b/website/css/main.css @@ -908,7 +908,6 @@ img { } ul { - color: #495057; list-style-type: square; padding-left: 1.25em; } From 80cfbec0df75f6c0313230c0051276ad1d4fe156 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Tue, 21 Sep 2021 11:21:12 +0300 Subject: [PATCH 76/80] Update hero.html --- website/templates/index/hero.html | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/website/templates/index/hero.html b/website/templates/index/hero.html index af082e73a0c..873bcf9487a 100644 --- a/website/templates/index/hero.html +++ b/website/templates/index/hero.html @@ -12,7 +12,7 @@

@@ -31,7 +31,7 @@

Introducing ClickHouse inc.!
-

ClickHouse, Inc. Announces Incorporation, Along With $50M In Series A Funding. New financing will allow the open source success to build a world-class, commercial-grade cloud solution that’s secure, compliant, and convenient for any customer to use.

Read the Press Release From 6ab715198c0f2aea3918b2ba97bd2b24ae65c17f Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 21 Sep 2021 16:23:04 +0300 Subject: [PATCH 77/80] Update SECURITY.md: change email and the list of versions. --- SECURITY.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/SECURITY.md b/SECURITY.md index 846b7e8239c..0405d5cf8fc 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -28,15 +28,16 @@ The following versions of ClickHouse server are currently being supported with s | 21.3 | ✅ | | 21.4 | :x: | | 21.5 | :x: | -| 21.6 | ✅ | +| 21.6 | :x: | | 21.7 | ✅ | | 21.8 | ✅ | +| 21.9 | ✅ | ## Reporting a Vulnerability We're extremely grateful for security researchers and users that report vulnerabilities to the ClickHouse Open Source Community. All reports are thoroughly investigated by developers. -To report a potential vulnerability in ClickHouse please send the details about it to [clickhouse-feedback@yandex-team.com](mailto:clickhouse-feedback@yandex-team.com). +To report a potential vulnerability in ClickHouse please send the details about it to [security@clickhouse.com](mailto:security@clickhouse.com). ### When Should I Report a Vulnerability? From d0721b87ab2d4d1200d0506ad8c408b909e3d241 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Tue, 21 Sep 2021 17:06:59 +0300 Subject: [PATCH 78/80] Update sitemap-static.xml --- website/sitemap-static.xml | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/website/sitemap-static.xml b/website/sitemap-static.xml index 6d6b41e5827..b5b5f3aa0d5 100644 --- a/website/sitemap-static.xml +++ b/website/sitemap-static.xml @@ -1,19 +1,23 @@ - https://clickhouse.tech/ + https://clickhouse.com/ daily - - https://clickhouse.tech/benchmark/dbms/ + + https://clickhouse.com/company/ weekly - https://clickhouse.tech/benchmark/hardware/ + https://clickhouse.com/benchmark/dbms/ weekly - https://clickhouse.tech/codebrowser/html_report/ClickHouse/index.html + https://clickhouse.com/benchmark/hardware/ + weekly + + + https://clickhouse.com/codebrowser/html_report/ClickHouse/index.html daily From 9ecf9e5ba08814907f1651c0a63d9f3808c5f5a7 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Tue, 21 Sep 2021 17:07:19 +0300 Subject: [PATCH 79/80] Update robots.txt --- website/robots.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/website/robots.txt b/website/robots.txt index 2cecc12e311..f7d6bd76a33 100644 --- a/website/robots.txt +++ b/website/robots.txt @@ -1,5 +1,5 @@ User-agent: * Disallow: /cdn-cgi/ Allow: / -Host: https://clickhouse.tech -Sitemap: https://clickhouse.tech/sitemap-index.xml +Host: https://clickhouse.com +Sitemap: https://clickhouse.com/sitemap-index.xml From 0f8798106f376e3ebf5b2073da06c0003aa85fdb Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Tue, 21 Sep 2021 17:07:59 +0300 Subject: [PATCH 80/80] Update sitemap-index.xml --- website/sitemap-index.xml | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/website/sitemap-index.xml b/website/sitemap-index.xml index 3fbdd99d372..bcaf6a3fe19 100644 --- a/website/sitemap-index.xml +++ b/website/sitemap-index.xml @@ -1,24 +1,24 @@ - https://clickhouse.tech/docs/en/sitemap.xml + https://clickhouse.com/docs/en/sitemap.xml - https://clickhouse.tech/docs/zh/sitemap.xml + https://clickhouse.com/docs/zh/sitemap.xml - https://clickhouse.tech/docs/ru/sitemap.xml + https://clickhouse.com/docs/ru/sitemap.xml - https://clickhouse.tech/docs/ja/sitemap.xml + https://clickhouse.com/docs/ja/sitemap.xml - https://clickhouse.tech/blog/en/sitemap.xml + https://clickhouse.com/blog/en/sitemap.xml - https://clickhouse.tech/blog/ru/sitemap.xml + https://clickhouse.com/blog/ru/sitemap.xml - https://clickhouse.tech/sitemap-static.xml + https://clickhouse.com/sitemap-static.xml